Linux Audio

Check our new training course

Linux BSP upgrade and security maintenance

Need help to get security updates for your Linux BSP?
Loading...
v6.8
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *	XFRM virtual interface
   4 *
   5 *	Copyright (C) 2018 secunet Security Networks AG
   6 *
   7 *	Author:
   8 *	Steffen Klassert <steffen.klassert@secunet.com>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/capability.h>
  13#include <linux/errno.h>
  14#include <linux/types.h>
  15#include <linux/sockios.h>
  16#include <linux/icmp.h>
  17#include <linux/if.h>
  18#include <linux/in.h>
  19#include <linux/ip.h>
  20#include <linux/net.h>
  21#include <linux/in6.h>
  22#include <linux/netdevice.h>
  23#include <linux/if_link.h>
  24#include <linux/if_arp.h>
  25#include <linux/icmpv6.h>
  26#include <linux/init.h>
  27#include <linux/route.h>
  28#include <linux/rtnetlink.h>
  29#include <linux/netfilter_ipv6.h>
  30#include <linux/slab.h>
  31#include <linux/hash.h>
  32
  33#include <linux/uaccess.h>
  34#include <linux/atomic.h>
  35
  36#include <net/gso.h>
  37#include <net/icmp.h>
  38#include <net/ip.h>
  39#include <net/ipv6.h>
  40#include <net/ip6_route.h>
  41#include <net/ip_tunnels.h>
  42#include <net/addrconf.h>
  43#include <net/xfrm.h>
  44#include <net/net_namespace.h>
  45#include <net/dst_metadata.h>
  46#include <net/netns/generic.h>
  47#include <linux/etherdevice.h>
  48
  49static int xfrmi_dev_init(struct net_device *dev);
  50static void xfrmi_dev_setup(struct net_device *dev);
  51static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
  52static unsigned int xfrmi_net_id __read_mostly;
  53static const struct net_device_ops xfrmi_netdev_ops;
  54
  55#define XFRMI_HASH_BITS	8
  56#define XFRMI_HASH_SIZE	BIT(XFRMI_HASH_BITS)
  57
  58struct xfrmi_net {
  59	/* lists for storing interfaces in use */
  60	struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE];
  61	struct xfrm_if __rcu *collect_md_xfrmi;
  62};
  63
  64static const struct nla_policy xfrm_lwt_policy[LWT_XFRM_MAX + 1] = {
  65	[LWT_XFRM_IF_ID]	= NLA_POLICY_MIN(NLA_U32, 1),
  66	[LWT_XFRM_LINK]		= NLA_POLICY_MIN(NLA_U32, 1),
  67};
  68
  69static void xfrmi_destroy_state(struct lwtunnel_state *lwt)
  70{
  71}
  72
  73static int xfrmi_build_state(struct net *net, struct nlattr *nla,
  74			     unsigned int family, const void *cfg,
  75			     struct lwtunnel_state **ts,
  76			     struct netlink_ext_ack *extack)
  77{
  78	struct nlattr *tb[LWT_XFRM_MAX + 1];
  79	struct lwtunnel_state *new_state;
  80	struct xfrm_md_info *info;
  81	int ret;
  82
  83	ret = nla_parse_nested(tb, LWT_XFRM_MAX, nla, xfrm_lwt_policy, extack);
  84	if (ret < 0)
  85		return ret;
  86
  87	if (!tb[LWT_XFRM_IF_ID]) {
  88		NL_SET_ERR_MSG(extack, "if_id must be set");
  89		return -EINVAL;
  90	}
  91
  92	new_state = lwtunnel_state_alloc(sizeof(*info));
  93	if (!new_state) {
  94		NL_SET_ERR_MSG(extack, "failed to create encap info");
  95		return -ENOMEM;
  96	}
  97
  98	new_state->type = LWTUNNEL_ENCAP_XFRM;
  99
 100	info = lwt_xfrm_info(new_state);
 101
 102	info->if_id = nla_get_u32(tb[LWT_XFRM_IF_ID]);
 103
 104	if (tb[LWT_XFRM_LINK])
 105		info->link = nla_get_u32(tb[LWT_XFRM_LINK]);
 106
 107	*ts = new_state;
 108	return 0;
 109}
 110
 111static int xfrmi_fill_encap_info(struct sk_buff *skb,
 112				 struct lwtunnel_state *lwt)
 113{
 114	struct xfrm_md_info *info = lwt_xfrm_info(lwt);
 115
 116	if (nla_put_u32(skb, LWT_XFRM_IF_ID, info->if_id) ||
 117	    (info->link && nla_put_u32(skb, LWT_XFRM_LINK, info->link)))
 118		return -EMSGSIZE;
 119
 120	return 0;
 121}
 122
 123static int xfrmi_encap_nlsize(struct lwtunnel_state *lwtstate)
 124{
 125	return nla_total_size(sizeof(u32)) + /* LWT_XFRM_IF_ID */
 126		nla_total_size(sizeof(u32)); /* LWT_XFRM_LINK */
 127}
 128
 129static int xfrmi_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 130{
 131	struct xfrm_md_info *a_info = lwt_xfrm_info(a);
 132	struct xfrm_md_info *b_info = lwt_xfrm_info(b);
 133
 134	return memcmp(a_info, b_info, sizeof(*a_info));
 135}
 136
 137static const struct lwtunnel_encap_ops xfrmi_encap_ops = {
 138	.build_state	= xfrmi_build_state,
 139	.destroy_state	= xfrmi_destroy_state,
 140	.fill_encap	= xfrmi_fill_encap_info,
 141	.get_encap_size = xfrmi_encap_nlsize,
 142	.cmp_encap	= xfrmi_encap_cmp,
 143	.owner		= THIS_MODULE,
 144};
 145
 146#define for_each_xfrmi_rcu(start, xi) \
 147	for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
 148
 149static u32 xfrmi_hash(u32 if_id)
 150{
 151	return hash_32(if_id, XFRMI_HASH_BITS);
 152}
 153
 154static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
 155{
 156	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 157	struct xfrm_if *xi;
 158
 159	for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) {
 160		if (x->if_id == xi->p.if_id &&
 161		    (xi->dev->flags & IFF_UP))
 162			return xi;
 163	}
 164
 165	xi = rcu_dereference(xfrmn->collect_md_xfrmi);
 166	if (xi && (xi->dev->flags & IFF_UP))
 167		return xi;
 168
 169	return NULL;
 170}
 171
 172static bool xfrmi_decode_session(struct sk_buff *skb,
 173				 unsigned short family,
 174				 struct xfrm_if_decode_session_result *res)
 175{
 176	struct net_device *dev;
 177	struct xfrm_if *xi;
 178	int ifindex = 0;
 179
 180	if (!secpath_exists(skb) || !skb->dev)
 181		return false;
 182
 183	switch (family) {
 184	case AF_INET6:
 185		ifindex = inet6_sdif(skb);
 186		break;
 187	case AF_INET:
 188		ifindex = inet_sdif(skb);
 189		break;
 190	}
 191
 192	if (ifindex) {
 193		struct net *net = xs_net(xfrm_input_state(skb));
 194
 195		dev = dev_get_by_index_rcu(net, ifindex);
 196	} else {
 197		dev = skb->dev;
 198	}
 199
 200	if (!dev || !(dev->flags & IFF_UP))
 201		return false;
 202	if (dev->netdev_ops != &xfrmi_netdev_ops)
 203		return false;
 204
 205	xi = netdev_priv(dev);
 206	res->net = xi->net;
 207
 208	if (xi->p.collect_md)
 209		res->if_id = xfrm_input_state(skb)->if_id;
 210	else
 211		res->if_id = xi->p.if_id;
 212	return true;
 213}
 214
 215static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
 216{
 217	struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
 218
 219	rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
 220	rcu_assign_pointer(*xip, xi);
 221}
 222
 223static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
 224{
 225	struct xfrm_if __rcu **xip;
 226	struct xfrm_if *iter;
 227
 228	for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
 229	     (iter = rtnl_dereference(*xip)) != NULL;
 230	     xip = &iter->next) {
 231		if (xi == iter) {
 232			rcu_assign_pointer(*xip, xi->next);
 233			break;
 234		}
 235	}
 236}
 237
 238static void xfrmi_dev_free(struct net_device *dev)
 239{
 240	struct xfrm_if *xi = netdev_priv(dev);
 241
 242	gro_cells_destroy(&xi->gro_cells);
 243	free_percpu(dev->tstats);
 244}
 245
 246static int xfrmi_create(struct net_device *dev)
 247{
 248	struct xfrm_if *xi = netdev_priv(dev);
 249	struct net *net = dev_net(dev);
 250	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 251	int err;
 252
 253	dev->rtnl_link_ops = &xfrmi_link_ops;
 254	err = register_netdevice(dev);
 255	if (err < 0)
 256		goto out;
 257
 258	if (xi->p.collect_md)
 259		rcu_assign_pointer(xfrmn->collect_md_xfrmi, xi);
 260	else
 261		xfrmi_link(xfrmn, xi);
 262
 263	return 0;
 264
 265out:
 266	return err;
 267}
 268
 269static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
 270{
 271	struct xfrm_if __rcu **xip;
 272	struct xfrm_if *xi;
 273	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 274
 275	for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)];
 276	     (xi = rtnl_dereference(*xip)) != NULL;
 277	     xip = &xi->next)
 278		if (xi->p.if_id == p->if_id)
 279			return xi;
 280
 281	return NULL;
 282}
 283
 284static void xfrmi_dev_uninit(struct net_device *dev)
 285{
 286	struct xfrm_if *xi = netdev_priv(dev);
 287	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
 288
 289	if (xi->p.collect_md)
 290		RCU_INIT_POINTER(xfrmn->collect_md_xfrmi, NULL);
 291	else
 292		xfrmi_unlink(xfrmn, xi);
 293}
 294
 295static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
 296{
 297	skb_clear_tstamp(skb);
 298	skb->pkt_type = PACKET_HOST;
 299	skb->skb_iif = 0;
 300	skb->ignore_df = 0;
 301	skb_dst_drop(skb);
 302	nf_reset_ct(skb);
 303	nf_reset_trace(skb);
 304
 305	if (!xnet)
 306		return;
 307
 308	ipvs_reset(skb);
 309	secpath_reset(skb);
 310	skb_orphan(skb);
 311	skb->mark = 0;
 312}
 313
 314static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
 315		       int encap_type, unsigned short family)
 316{
 317	struct sec_path *sp;
 318
 319	sp = skb_sec_path(skb);
 320	if (sp && (sp->len || sp->olen) &&
 321	    !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
 322		goto discard;
 323
 324	XFRM_SPI_SKB_CB(skb)->family = family;
 325	if (family == AF_INET) {
 326		XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
 327		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
 328	} else {
 329		XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
 330		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
 331	}
 332
 333	return xfrm_input(skb, nexthdr, spi, encap_type);
 334discard:
 335	kfree_skb(skb);
 336	return 0;
 337}
 338
 339static int xfrmi4_rcv(struct sk_buff *skb)
 340{
 341	return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
 342}
 343
 344static int xfrmi6_rcv(struct sk_buff *skb)
 345{
 346	return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
 347			   0, 0, AF_INET6);
 348}
 349
 350static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 351{
 352	return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
 353}
 354
 355static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 356{
 357	return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
 358}
 359
 360static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
 361{
 362	const struct xfrm_mode *inner_mode;
 363	struct net_device *dev;
 364	struct xfrm_state *x;
 365	struct xfrm_if *xi;
 366	bool xnet;
 367	int link;
 368
 369	if (err && !secpath_exists(skb))
 370		return 0;
 371
 372	x = xfrm_input_state(skb);
 373
 374	xi = xfrmi_lookup(xs_net(x), x);
 375	if (!xi)
 376		return 1;
 377
 378	link = skb->dev->ifindex;
 379	dev = xi->dev;
 380	skb->dev = dev;
 381
 382	if (err) {
 383		DEV_STATS_INC(dev, rx_errors);
 384		DEV_STATS_INC(dev, rx_dropped);
 385
 386		return 0;
 387	}
 388
 389	xnet = !net_eq(xi->net, dev_net(skb->dev));
 390
 391	if (xnet) {
 392		inner_mode = &x->inner_mode;
 393
 394		if (x->sel.family == AF_UNSPEC) {
 395			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
 396			if (inner_mode == NULL) {
 397				XFRM_INC_STATS(dev_net(skb->dev),
 398					       LINUX_MIB_XFRMINSTATEMODEERROR);
 399				return -EINVAL;
 400			}
 401		}
 402
 403		if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
 404				       inner_mode->family))
 405			return -EPERM;
 406	}
 407
 408	xfrmi_scrub_packet(skb, xnet);
 409	if (xi->p.collect_md) {
 410		struct metadata_dst *md_dst;
 411
 412		md_dst = metadata_dst_alloc(0, METADATA_XFRM, GFP_ATOMIC);
 413		if (!md_dst)
 414			return -ENOMEM;
 415
 416		md_dst->u.xfrm_info.if_id = x->if_id;
 417		md_dst->u.xfrm_info.link = link;
 418		skb_dst_set(skb, (struct dst_entry *)md_dst);
 419	}
 420	dev_sw_netstats_rx_add(dev, skb->len);
 421
 422	return 0;
 423}
 424
 425static int
 426xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 427{
 428	struct xfrm_if *xi = netdev_priv(dev);
 429	struct dst_entry *dst = skb_dst(skb);
 430	unsigned int length = skb->len;
 431	struct net_device *tdev;
 432	struct xfrm_state *x;
 433	int err = -1;
 434	u32 if_id;
 435	int mtu;
 436
 437	if (xi->p.collect_md) {
 438		struct xfrm_md_info *md_info = skb_xfrm_md_info(skb);
 439
 440		if (unlikely(!md_info))
 441			return -EINVAL;
 442
 443		if_id = md_info->if_id;
 444		fl->flowi_oif = md_info->link;
 445		if (md_info->dst_orig) {
 446			struct dst_entry *tmp_dst = dst;
 447
 448			dst = md_info->dst_orig;
 449			skb_dst_set(skb, dst);
 450			md_info->dst_orig = NULL;
 451			dst_release(tmp_dst);
 452		}
 453	} else {
 454		if_id = xi->p.if_id;
 455	}
 456
 457	dst_hold(dst);
 458	dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, if_id);
 459	if (IS_ERR(dst)) {
 460		err = PTR_ERR(dst);
 461		dst = NULL;
 462		goto tx_err_link_failure;
 463	}
 464
 465	x = dst->xfrm;
 466	if (!x)
 467		goto tx_err_link_failure;
 468
 469	if (x->if_id != if_id)
 470		goto tx_err_link_failure;
 471
 472	tdev = dst->dev;
 473
 474	if (tdev == dev) {
 475		DEV_STATS_INC(dev, collisions);
 476		net_warn_ratelimited("%s: Local routing loop detected!\n",
 477				     dev->name);
 478		goto tx_err_dst_release;
 479	}
 480
 481	mtu = dst_mtu(dst);
 482	if ((!skb_is_gso(skb) && skb->len > mtu) ||
 483	    (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) {
 484		skb_dst_update_pmtu_no_confirm(skb, mtu);
 485
 486		if (skb->protocol == htons(ETH_P_IPV6)) {
 487			if (mtu < IPV6_MIN_MTU)
 488				mtu = IPV6_MIN_MTU;
 489
 490			if (skb->len > 1280)
 491				icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 492			else
 493				goto xmit;
 494		} else {
 495			if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
 496				goto xmit;
 497			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 498				      htonl(mtu));
 499		}
 500
 501		dst_release(dst);
 502		return -EMSGSIZE;
 503	}
 504
 505xmit:
 506	xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
 507	skb_dst_set(skb, dst);
 508	skb->dev = tdev;
 509
 510	err = dst_output(xi->net, skb->sk, skb);
 511	if (net_xmit_eval(err) == 0) {
 512		dev_sw_netstats_tx_add(dev, 1, length);
 513	} else {
 514		DEV_STATS_INC(dev, tx_errors);
 515		DEV_STATS_INC(dev, tx_aborted_errors);
 516	}
 517
 518	return 0;
 519tx_err_link_failure:
 520	DEV_STATS_INC(dev, tx_carrier_errors);
 521	dst_link_failure(skb);
 522tx_err_dst_release:
 523	dst_release(dst);
 524	return err;
 525}
 526
 527static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
 528{
 529	struct xfrm_if *xi = netdev_priv(dev);
 530	struct dst_entry *dst = skb_dst(skb);
 531	struct flowi fl;
 532	int ret;
 533
 534	memset(&fl, 0, sizeof(fl));
 535
 536	switch (skb->protocol) {
 537	case htons(ETH_P_IPV6):
 538		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 539		xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
 540		if (!dst) {
 541			fl.u.ip6.flowi6_oif = dev->ifindex;
 542			fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
 543			dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
 544			if (dst->error) {
 545				dst_release(dst);
 546				DEV_STATS_INC(dev, tx_carrier_errors);
 547				goto tx_err;
 548			}
 549			skb_dst_set(skb, dst);
 550		}
 551		break;
 552	case htons(ETH_P_IP):
 553		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 554		xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
 555		if (!dst) {
 556			struct rtable *rt;
 557
 558			fl.u.ip4.flowi4_oif = dev->ifindex;
 559			fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
 560			rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
 561			if (IS_ERR(rt)) {
 562				DEV_STATS_INC(dev, tx_carrier_errors);
 563				goto tx_err;
 564			}
 565			skb_dst_set(skb, &rt->dst);
 566		}
 567		break;
 568	default:
 569		goto tx_err;
 570	}
 571
 572	fl.flowi_oif = xi->p.link;
 573
 574	ret = xfrmi_xmit2(skb, dev, &fl);
 575	if (ret < 0)
 576		goto tx_err;
 577
 578	return NETDEV_TX_OK;
 579
 580tx_err:
 581	DEV_STATS_INC(dev, tx_errors);
 582	DEV_STATS_INC(dev, tx_dropped);
 583	kfree_skb(skb);
 584	return NETDEV_TX_OK;
 585}
 586
 587static int xfrmi4_err(struct sk_buff *skb, u32 info)
 588{
 589	const struct iphdr *iph = (const struct iphdr *)skb->data;
 590	struct net *net = dev_net(skb->dev);
 591	int protocol = iph->protocol;
 592	struct ip_comp_hdr *ipch;
 593	struct ip_esp_hdr *esph;
 594	struct ip_auth_hdr *ah ;
 595	struct xfrm_state *x;
 596	struct xfrm_if *xi;
 597	__be32 spi;
 598
 599	switch (protocol) {
 600	case IPPROTO_ESP:
 601		esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
 602		spi = esph->spi;
 603		break;
 604	case IPPROTO_AH:
 605		ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
 606		spi = ah->spi;
 607		break;
 608	case IPPROTO_COMP:
 609		ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
 610		spi = htonl(ntohs(ipch->cpi));
 611		break;
 612	default:
 613		return 0;
 614	}
 615
 616	switch (icmp_hdr(skb)->type) {
 617	case ICMP_DEST_UNREACH:
 618		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 619			return 0;
 620		break;
 621	case ICMP_REDIRECT:
 622		break;
 623	default:
 624		return 0;
 625	}
 626
 627	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
 628			      spi, protocol, AF_INET);
 629	if (!x)
 630		return 0;
 631
 632	xi = xfrmi_lookup(net, x);
 633	if (!xi) {
 634		xfrm_state_put(x);
 635		return -1;
 636	}
 637
 638	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
 639		ipv4_update_pmtu(skb, net, info, 0, protocol);
 640	else
 641		ipv4_redirect(skb, net, 0, protocol);
 642	xfrm_state_put(x);
 643
 644	return 0;
 645}
 646
 647static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 648		    u8 type, u8 code, int offset, __be32 info)
 649{
 650	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
 651	struct net *net = dev_net(skb->dev);
 652	int protocol = iph->nexthdr;
 653	struct ip_comp_hdr *ipch;
 654	struct ip_esp_hdr *esph;
 655	struct ip_auth_hdr *ah;
 656	struct xfrm_state *x;
 657	struct xfrm_if *xi;
 658	__be32 spi;
 659
 660	switch (protocol) {
 661	case IPPROTO_ESP:
 662		esph = (struct ip_esp_hdr *)(skb->data + offset);
 663		spi = esph->spi;
 664		break;
 665	case IPPROTO_AH:
 666		ah = (struct ip_auth_hdr *)(skb->data + offset);
 667		spi = ah->spi;
 668		break;
 669	case IPPROTO_COMP:
 670		ipch = (struct ip_comp_hdr *)(skb->data + offset);
 671		spi = htonl(ntohs(ipch->cpi));
 672		break;
 673	default:
 674		return 0;
 675	}
 676
 677	if (type != ICMPV6_PKT_TOOBIG &&
 678	    type != NDISC_REDIRECT)
 679		return 0;
 680
 681	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
 682			      spi, protocol, AF_INET6);
 683	if (!x)
 684		return 0;
 685
 686	xi = xfrmi_lookup(net, x);
 687	if (!xi) {
 688		xfrm_state_put(x);
 689		return -1;
 690	}
 691
 692	if (type == NDISC_REDIRECT)
 693		ip6_redirect(skb, net, skb->dev->ifindex, 0,
 694			     sock_net_uid(net, NULL));
 695	else
 696		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 697	xfrm_state_put(x);
 698
 699	return 0;
 700}
 701
 702static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
 703{
 704	if (xi->p.link != p->link)
 705		return -EINVAL;
 706
 707	xi->p.if_id = p->if_id;
 708
 709	return 0;
 710}
 711
 712static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
 713{
 714	struct net *net = xi->net;
 715	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 716	int err;
 717
 718	xfrmi_unlink(xfrmn, xi);
 719	synchronize_net();
 720	err = xfrmi_change(xi, p);
 721	xfrmi_link(xfrmn, xi);
 722	netdev_state_change(xi->dev);
 723	return err;
 724}
 725
 726static int xfrmi_get_iflink(const struct net_device *dev)
 727{
 728	struct xfrm_if *xi = netdev_priv(dev);
 729
 730	return xi->p.link;
 731}
 732
 733static const struct net_device_ops xfrmi_netdev_ops = {
 734	.ndo_init	= xfrmi_dev_init,
 735	.ndo_uninit	= xfrmi_dev_uninit,
 736	.ndo_start_xmit = xfrmi_xmit,
 737	.ndo_get_stats64 = dev_get_tstats64,
 738	.ndo_get_iflink = xfrmi_get_iflink,
 739};
 740
 741static void xfrmi_dev_setup(struct net_device *dev)
 742{
 743	dev->netdev_ops 	= &xfrmi_netdev_ops;
 744	dev->header_ops		= &ip_tunnel_header_ops;
 745	dev->type		= ARPHRD_NONE;
 746	dev->mtu		= ETH_DATA_LEN;
 747	dev->min_mtu		= ETH_MIN_MTU;
 748	dev->max_mtu		= IP_MAX_MTU;
 749	dev->flags 		= IFF_NOARP;
 750	dev->needs_free_netdev	= true;
 751	dev->priv_destructor	= xfrmi_dev_free;
 
 752	netif_keep_dst(dev);
 753
 754	eth_broadcast_addr(dev->broadcast);
 755}
 756
 757#define XFRMI_FEATURES (NETIF_F_SG |		\
 758			NETIF_F_FRAGLIST |	\
 759			NETIF_F_GSO_SOFTWARE |	\
 760			NETIF_F_HW_CSUM)
 761
 762static int xfrmi_dev_init(struct net_device *dev)
 763{
 764	struct xfrm_if *xi = netdev_priv(dev);
 765	struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
 766	int err;
 767
 768	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 769	if (!dev->tstats)
 770		return -ENOMEM;
 771
 772	err = gro_cells_init(&xi->gro_cells, dev);
 773	if (err) {
 774		free_percpu(dev->tstats);
 775		return err;
 776	}
 777
 778	dev->features |= NETIF_F_LLTX;
 779	dev->features |= XFRMI_FEATURES;
 780	dev->hw_features |= XFRMI_FEATURES;
 781
 782	if (phydev) {
 783		dev->needed_headroom = phydev->needed_headroom;
 784		dev->needed_tailroom = phydev->needed_tailroom;
 785
 786		if (is_zero_ether_addr(dev->dev_addr))
 787			eth_hw_addr_inherit(dev, phydev);
 788		if (is_zero_ether_addr(dev->broadcast))
 789			memcpy(dev->broadcast, phydev->broadcast,
 790			       dev->addr_len);
 791	} else {
 792		eth_hw_addr_random(dev);
 793		eth_broadcast_addr(dev->broadcast);
 794	}
 795
 796	return 0;
 797}
 798
 799static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
 800			 struct netlink_ext_ack *extack)
 801{
 802	return 0;
 803}
 804
 805static void xfrmi_netlink_parms(struct nlattr *data[],
 806			       struct xfrm_if_parms *parms)
 807{
 808	memset(parms, 0, sizeof(*parms));
 809
 810	if (!data)
 811		return;
 812
 813	if (data[IFLA_XFRM_LINK])
 814		parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
 815
 816	if (data[IFLA_XFRM_IF_ID])
 817		parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
 818
 819	if (data[IFLA_XFRM_COLLECT_METADATA])
 820		parms->collect_md = true;
 821}
 822
 823static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
 824			struct nlattr *tb[], struct nlattr *data[],
 825			struct netlink_ext_ack *extack)
 826{
 827	struct net *net = dev_net(dev);
 828	struct xfrm_if_parms p = {};
 829	struct xfrm_if *xi;
 830	int err;
 831
 832	xfrmi_netlink_parms(data, &p);
 833	if (p.collect_md) {
 834		struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 835
 836		if (p.link || p.if_id) {
 837			NL_SET_ERR_MSG(extack, "link and if_id must be zero");
 838			return -EINVAL;
 839		}
 840
 841		if (rtnl_dereference(xfrmn->collect_md_xfrmi))
 842			return -EEXIST;
 843
 844	} else {
 845		if (!p.if_id) {
 846			NL_SET_ERR_MSG(extack, "if_id must be non zero");
 847			return -EINVAL;
 848		}
 849
 850		xi = xfrmi_locate(net, &p);
 851		if (xi)
 852			return -EEXIST;
 853	}
 854
 855	xi = netdev_priv(dev);
 856	xi->p = p;
 857	xi->net = net;
 858	xi->dev = dev;
 859
 860	err = xfrmi_create(dev);
 861	return err;
 862}
 863
 864static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
 865{
 866	unregister_netdevice_queue(dev, head);
 867}
 868
 869static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
 870			   struct nlattr *data[],
 871			   struct netlink_ext_ack *extack)
 872{
 873	struct xfrm_if *xi = netdev_priv(dev);
 874	struct net *net = xi->net;
 875	struct xfrm_if_parms p = {};
 876
 877	xfrmi_netlink_parms(data, &p);
 878	if (!p.if_id) {
 879		NL_SET_ERR_MSG(extack, "if_id must be non zero");
 880		return -EINVAL;
 881	}
 882
 883	if (p.collect_md) {
 884		NL_SET_ERR_MSG(extack, "collect_md can't be changed");
 885		return -EINVAL;
 886	}
 887
 888	xi = xfrmi_locate(net, &p);
 889	if (!xi) {
 890		xi = netdev_priv(dev);
 891	} else {
 892		if (xi->dev != dev)
 893			return -EEXIST;
 894		if (xi->p.collect_md) {
 895			NL_SET_ERR_MSG(extack,
 896				       "device can't be changed to collect_md");
 897			return -EINVAL;
 898		}
 899	}
 900
 901	return xfrmi_update(xi, &p);
 902}
 903
 904static size_t xfrmi_get_size(const struct net_device *dev)
 905{
 906	return
 907		/* IFLA_XFRM_LINK */
 908		nla_total_size(4) +
 909		/* IFLA_XFRM_IF_ID */
 910		nla_total_size(4) +
 911		/* IFLA_XFRM_COLLECT_METADATA */
 912		nla_total_size(0) +
 913		0;
 914}
 915
 916static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
 917{
 918	struct xfrm_if *xi = netdev_priv(dev);
 919	struct xfrm_if_parms *parm = &xi->p;
 920
 921	if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
 922	    nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id) ||
 923	    (xi->p.collect_md && nla_put_flag(skb, IFLA_XFRM_COLLECT_METADATA)))
 924		goto nla_put_failure;
 925	return 0;
 926
 927nla_put_failure:
 928	return -EMSGSIZE;
 929}
 930
 931static struct net *xfrmi_get_link_net(const struct net_device *dev)
 932{
 933	struct xfrm_if *xi = netdev_priv(dev);
 934
 935	return xi->net;
 936}
 937
 938static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
 939	[IFLA_XFRM_UNSPEC]		= { .strict_start_type = IFLA_XFRM_COLLECT_METADATA },
 940	[IFLA_XFRM_LINK]		= { .type = NLA_U32 },
 941	[IFLA_XFRM_IF_ID]		= { .type = NLA_U32 },
 942	[IFLA_XFRM_COLLECT_METADATA]	= { .type = NLA_FLAG },
 943};
 944
 945static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
 946	.kind		= "xfrm",
 947	.maxtype	= IFLA_XFRM_MAX,
 948	.policy		= xfrmi_policy,
 949	.priv_size	= sizeof(struct xfrm_if),
 950	.setup		= xfrmi_dev_setup,
 951	.validate	= xfrmi_validate,
 952	.newlink	= xfrmi_newlink,
 953	.dellink	= xfrmi_dellink,
 954	.changelink	= xfrmi_changelink,
 955	.get_size	= xfrmi_get_size,
 956	.fill_info	= xfrmi_fill_info,
 957	.get_link_net	= xfrmi_get_link_net,
 958};
 959
 960static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
 
 961{
 962	struct net *net;
 963	LIST_HEAD(list);
 964
 965	rtnl_lock();
 966	list_for_each_entry(net, net_exit_list, exit_list) {
 967		struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 968		struct xfrm_if __rcu **xip;
 969		struct xfrm_if *xi;
 970		int i;
 971
 972		for (i = 0; i < XFRMI_HASH_SIZE; i++) {
 973			for (xip = &xfrmn->xfrmi[i];
 974			     (xi = rtnl_dereference(*xip)) != NULL;
 975			     xip = &xi->next)
 976				unregister_netdevice_queue(xi->dev, &list);
 977		}
 978		xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
 979		if (xi)
 980			unregister_netdevice_queue(xi->dev, &list);
 981	}
 982	unregister_netdevice_many(&list);
 983	rtnl_unlock();
 984}
 985
 986static struct pernet_operations xfrmi_net_ops = {
 987	.exit_batch = xfrmi_exit_batch_net,
 988	.id   = &xfrmi_net_id,
 989	.size = sizeof(struct xfrmi_net),
 990};
 991
 992static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
 993	.handler	=	xfrmi6_rcv,
 994	.input_handler	=	xfrmi6_input,
 995	.cb_handler	=	xfrmi_rcv_cb,
 996	.err_handler	=	xfrmi6_err,
 997	.priority	=	10,
 998};
 999
1000static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
1001	.handler	=	xfrm6_rcv,
1002	.input_handler	=	xfrm_input,
1003	.cb_handler	=	xfrmi_rcv_cb,
1004	.err_handler	=	xfrmi6_err,
1005	.priority	=	10,
1006};
1007
1008static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
1009	.handler	=	xfrm6_rcv,
1010	.input_handler	=	xfrm_input,
1011	.cb_handler	=	xfrmi_rcv_cb,
1012	.err_handler	=	xfrmi6_err,
1013	.priority	=	10,
1014};
1015
1016#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
1017static int xfrmi6_rcv_tunnel(struct sk_buff *skb)
1018{
1019	const xfrm_address_t *saddr;
1020	__be32 spi;
1021
1022	saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr;
1023	spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr);
1024
1025	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
1026}
1027
1028static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = {
1029	.handler	=	xfrmi6_rcv_tunnel,
1030	.cb_handler	=	xfrmi_rcv_cb,
1031	.err_handler	=	xfrmi6_err,
1032	.priority	=	2,
1033};
1034
1035static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
1036	.handler	=	xfrmi6_rcv_tunnel,
1037	.cb_handler	=	xfrmi_rcv_cb,
1038	.err_handler	=	xfrmi6_err,
1039	.priority	=	2,
1040};
1041#endif
1042
1043static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
1044	.handler	=	xfrmi4_rcv,
1045	.input_handler	=	xfrmi4_input,
1046	.cb_handler	=	xfrmi_rcv_cb,
1047	.err_handler	=	xfrmi4_err,
1048	.priority	=	10,
1049};
1050
1051static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
1052	.handler	=	xfrm4_rcv,
1053	.input_handler	=	xfrm_input,
1054	.cb_handler	=	xfrmi_rcv_cb,
1055	.err_handler	=	xfrmi4_err,
1056	.priority	=	10,
1057};
1058
1059static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
1060	.handler	=	xfrm4_rcv,
1061	.input_handler	=	xfrm_input,
1062	.cb_handler	=	xfrmi_rcv_cb,
1063	.err_handler	=	xfrmi4_err,
1064	.priority	=	10,
1065};
1066
1067#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
1068static int xfrmi4_rcv_tunnel(struct sk_buff *skb)
1069{
1070	return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
1071}
1072
1073static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = {
1074	.handler	=	xfrmi4_rcv_tunnel,
1075	.cb_handler	=	xfrmi_rcv_cb,
1076	.err_handler	=	xfrmi4_err,
1077	.priority	=	3,
1078};
1079
1080static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = {
1081	.handler	=	xfrmi4_rcv_tunnel,
1082	.cb_handler	=	xfrmi_rcv_cb,
1083	.err_handler	=	xfrmi4_err,
1084	.priority	=	2,
1085};
1086#endif
1087
1088static int __init xfrmi4_init(void)
1089{
1090	int err;
1091
1092	err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
1093	if (err < 0)
1094		goto xfrm_proto_esp_failed;
1095	err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
1096	if (err < 0)
1097		goto xfrm_proto_ah_failed;
1098	err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
1099	if (err < 0)
1100		goto xfrm_proto_comp_failed;
1101#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
1102	err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET);
1103	if (err < 0)
1104		goto xfrm_tunnel_ipip_failed;
1105	err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6);
1106	if (err < 0)
1107		goto xfrm_tunnel_ipip6_failed;
1108#endif
1109
1110	return 0;
1111
1112#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
1113xfrm_tunnel_ipip6_failed:
1114	xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
1115xfrm_tunnel_ipip_failed:
1116	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
1117#endif
1118xfrm_proto_comp_failed:
1119	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
1120xfrm_proto_ah_failed:
1121	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
1122xfrm_proto_esp_failed:
1123	return err;
1124}
1125
1126static void xfrmi4_fini(void)
1127{
1128#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
1129	xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6);
1130	xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
1131#endif
1132	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
1133	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
1134	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
1135}
1136
1137static int __init xfrmi6_init(void)
1138{
1139	int err;
1140
1141	err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
1142	if (err < 0)
1143		goto xfrm_proto_esp_failed;
1144	err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
1145	if (err < 0)
1146		goto xfrm_proto_ah_failed;
1147	err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
1148	if (err < 0)
1149		goto xfrm_proto_comp_failed;
1150#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
1151	err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6);
1152	if (err < 0)
1153		goto xfrm_tunnel_ipv6_failed;
1154	err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET);
1155	if (err < 0)
1156		goto xfrm_tunnel_ip6ip_failed;
1157#endif
1158
1159	return 0;
1160
1161#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
1162xfrm_tunnel_ip6ip_failed:
1163	xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
1164xfrm_tunnel_ipv6_failed:
1165	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
1166#endif
1167xfrm_proto_comp_failed:
1168	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
1169xfrm_proto_ah_failed:
1170	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
1171xfrm_proto_esp_failed:
1172	return err;
1173}
1174
1175static void xfrmi6_fini(void)
1176{
1177#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
1178	xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET);
1179	xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
1180#endif
1181	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
1182	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
1183	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
1184}
1185
1186static const struct xfrm_if_cb xfrm_if_cb = {
1187	.decode_session =	xfrmi_decode_session,
1188};
1189
1190static int __init xfrmi_init(void)
1191{
1192	const char *msg;
1193	int err;
1194
1195	pr_info("IPsec XFRM device driver\n");
1196
1197	msg = "tunnel device";
1198	err = register_pernet_device(&xfrmi_net_ops);
1199	if (err < 0)
1200		goto pernet_dev_failed;
1201
1202	msg = "xfrm4 protocols";
1203	err = xfrmi4_init();
1204	if (err < 0)
1205		goto xfrmi4_failed;
1206
1207	msg = "xfrm6 protocols";
1208	err = xfrmi6_init();
1209	if (err < 0)
1210		goto xfrmi6_failed;
1211
1212
1213	msg = "netlink interface";
1214	err = rtnl_link_register(&xfrmi_link_ops);
1215	if (err < 0)
1216		goto rtnl_link_failed;
1217
1218	err = register_xfrm_interface_bpf();
1219	if (err < 0)
1220		goto kfunc_failed;
1221
1222	lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
1223
1224	xfrm_if_register_cb(&xfrm_if_cb);
1225
1226	return err;
1227
1228kfunc_failed:
1229	rtnl_link_unregister(&xfrmi_link_ops);
1230rtnl_link_failed:
1231	xfrmi6_fini();
1232xfrmi6_failed:
1233	xfrmi4_fini();
1234xfrmi4_failed:
1235	unregister_pernet_device(&xfrmi_net_ops);
1236pernet_dev_failed:
1237	pr_err("xfrmi init: failed to register %s\n", msg);
1238	return err;
1239}
1240
1241static void __exit xfrmi_fini(void)
1242{
1243	xfrm_if_unregister_cb();
1244	lwtunnel_encap_del_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
1245	rtnl_link_unregister(&xfrmi_link_ops);
1246	xfrmi4_fini();
1247	xfrmi6_fini();
1248	unregister_pernet_device(&xfrmi_net_ops);
1249}
1250
1251module_init(xfrmi_init);
1252module_exit(xfrmi_fini);
1253MODULE_LICENSE("GPL");
1254MODULE_ALIAS_RTNL_LINK("xfrm");
1255MODULE_ALIAS_NETDEV("xfrm0");
1256MODULE_AUTHOR("Steffen Klassert");
1257MODULE_DESCRIPTION("XFRM virtual interface");
v6.13.7
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *	XFRM virtual interface
   4 *
   5 *	Copyright (C) 2018 secunet Security Networks AG
   6 *
   7 *	Author:
   8 *	Steffen Klassert <steffen.klassert@secunet.com>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/capability.h>
  13#include <linux/errno.h>
  14#include <linux/types.h>
  15#include <linux/sockios.h>
  16#include <linux/icmp.h>
  17#include <linux/if.h>
  18#include <linux/in.h>
  19#include <linux/ip.h>
  20#include <linux/net.h>
  21#include <linux/in6.h>
  22#include <linux/netdevice.h>
  23#include <linux/if_link.h>
  24#include <linux/if_arp.h>
  25#include <linux/icmpv6.h>
  26#include <linux/init.h>
  27#include <linux/route.h>
  28#include <linux/rtnetlink.h>
  29#include <linux/netfilter_ipv6.h>
  30#include <linux/slab.h>
  31#include <linux/hash.h>
  32
  33#include <linux/uaccess.h>
  34#include <linux/atomic.h>
  35
  36#include <net/gso.h>
  37#include <net/icmp.h>
  38#include <net/ip.h>
  39#include <net/ipv6.h>
  40#include <net/ip6_route.h>
  41#include <net/ip_tunnels.h>
  42#include <net/addrconf.h>
  43#include <net/xfrm.h>
  44#include <net/net_namespace.h>
  45#include <net/dst_metadata.h>
  46#include <net/netns/generic.h>
  47#include <linux/etherdevice.h>
  48
  49static int xfrmi_dev_init(struct net_device *dev);
  50static void xfrmi_dev_setup(struct net_device *dev);
  51static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
  52static unsigned int xfrmi_net_id __read_mostly;
  53static const struct net_device_ops xfrmi_netdev_ops;
  54
  55#define XFRMI_HASH_BITS	8
  56#define XFRMI_HASH_SIZE	BIT(XFRMI_HASH_BITS)
  57
  58struct xfrmi_net {
  59	/* lists for storing interfaces in use */
  60	struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE];
  61	struct xfrm_if __rcu *collect_md_xfrmi;
  62};
  63
  64static const struct nla_policy xfrm_lwt_policy[LWT_XFRM_MAX + 1] = {
  65	[LWT_XFRM_IF_ID]	= NLA_POLICY_MIN(NLA_U32, 1),
  66	[LWT_XFRM_LINK]		= NLA_POLICY_MIN(NLA_U32, 1),
  67};
  68
  69static void xfrmi_destroy_state(struct lwtunnel_state *lwt)
  70{
  71}
  72
  73static int xfrmi_build_state(struct net *net, struct nlattr *nla,
  74			     unsigned int family, const void *cfg,
  75			     struct lwtunnel_state **ts,
  76			     struct netlink_ext_ack *extack)
  77{
  78	struct nlattr *tb[LWT_XFRM_MAX + 1];
  79	struct lwtunnel_state *new_state;
  80	struct xfrm_md_info *info;
  81	int ret;
  82
  83	ret = nla_parse_nested(tb, LWT_XFRM_MAX, nla, xfrm_lwt_policy, extack);
  84	if (ret < 0)
  85		return ret;
  86
  87	if (!tb[LWT_XFRM_IF_ID]) {
  88		NL_SET_ERR_MSG(extack, "if_id must be set");
  89		return -EINVAL;
  90	}
  91
  92	new_state = lwtunnel_state_alloc(sizeof(*info));
  93	if (!new_state) {
  94		NL_SET_ERR_MSG(extack, "failed to create encap info");
  95		return -ENOMEM;
  96	}
  97
  98	new_state->type = LWTUNNEL_ENCAP_XFRM;
  99
 100	info = lwt_xfrm_info(new_state);
 101
 102	info->if_id = nla_get_u32(tb[LWT_XFRM_IF_ID]);
 103
 104	if (tb[LWT_XFRM_LINK])
 105		info->link = nla_get_u32(tb[LWT_XFRM_LINK]);
 106
 107	*ts = new_state;
 108	return 0;
 109}
 110
 111static int xfrmi_fill_encap_info(struct sk_buff *skb,
 112				 struct lwtunnel_state *lwt)
 113{
 114	struct xfrm_md_info *info = lwt_xfrm_info(lwt);
 115
 116	if (nla_put_u32(skb, LWT_XFRM_IF_ID, info->if_id) ||
 117	    (info->link && nla_put_u32(skb, LWT_XFRM_LINK, info->link)))
 118		return -EMSGSIZE;
 119
 120	return 0;
 121}
 122
 123static int xfrmi_encap_nlsize(struct lwtunnel_state *lwtstate)
 124{
 125	return nla_total_size(sizeof(u32)) + /* LWT_XFRM_IF_ID */
 126		nla_total_size(sizeof(u32)); /* LWT_XFRM_LINK */
 127}
 128
 129static int xfrmi_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
 130{
 131	struct xfrm_md_info *a_info = lwt_xfrm_info(a);
 132	struct xfrm_md_info *b_info = lwt_xfrm_info(b);
 133
 134	return memcmp(a_info, b_info, sizeof(*a_info));
 135}
 136
 137static const struct lwtunnel_encap_ops xfrmi_encap_ops = {
 138	.build_state	= xfrmi_build_state,
 139	.destroy_state	= xfrmi_destroy_state,
 140	.fill_encap	= xfrmi_fill_encap_info,
 141	.get_encap_size = xfrmi_encap_nlsize,
 142	.cmp_encap	= xfrmi_encap_cmp,
 143	.owner		= THIS_MODULE,
 144};
 145
 146#define for_each_xfrmi_rcu(start, xi) \
 147	for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
 148
 149static u32 xfrmi_hash(u32 if_id)
 150{
 151	return hash_32(if_id, XFRMI_HASH_BITS);
 152}
 153
 154static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
 155{
 156	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 157	struct xfrm_if *xi;
 158
 159	for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) {
 160		if (x->if_id == xi->p.if_id &&
 161		    (xi->dev->flags & IFF_UP))
 162			return xi;
 163	}
 164
 165	xi = rcu_dereference(xfrmn->collect_md_xfrmi);
 166	if (xi && (xi->dev->flags & IFF_UP))
 167		return xi;
 168
 169	return NULL;
 170}
 171
 172static bool xfrmi_decode_session(struct sk_buff *skb,
 173				 unsigned short family,
 174				 struct xfrm_if_decode_session_result *res)
 175{
 176	struct net_device *dev;
 177	struct xfrm_if *xi;
 178	int ifindex = 0;
 179
 180	if (!secpath_exists(skb) || !skb->dev)
 181		return false;
 182
 183	switch (family) {
 184	case AF_INET6:
 185		ifindex = inet6_sdif(skb);
 186		break;
 187	case AF_INET:
 188		ifindex = inet_sdif(skb);
 189		break;
 190	}
 191
 192	if (ifindex) {
 193		struct net *net = xs_net(xfrm_input_state(skb));
 194
 195		dev = dev_get_by_index_rcu(net, ifindex);
 196	} else {
 197		dev = skb->dev;
 198	}
 199
 200	if (!dev || !(dev->flags & IFF_UP))
 201		return false;
 202	if (dev->netdev_ops != &xfrmi_netdev_ops)
 203		return false;
 204
 205	xi = netdev_priv(dev);
 206	res->net = xi->net;
 207
 208	if (xi->p.collect_md)
 209		res->if_id = xfrm_input_state(skb)->if_id;
 210	else
 211		res->if_id = xi->p.if_id;
 212	return true;
 213}
 214
 215static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
 216{
 217	struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
 218
 219	rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
 220	rcu_assign_pointer(*xip, xi);
 221}
 222
 223static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
 224{
 225	struct xfrm_if __rcu **xip;
 226	struct xfrm_if *iter;
 227
 228	for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
 229	     (iter = rtnl_dereference(*xip)) != NULL;
 230	     xip = &iter->next) {
 231		if (xi == iter) {
 232			rcu_assign_pointer(*xip, xi->next);
 233			break;
 234		}
 235	}
 236}
 237
 238static void xfrmi_dev_free(struct net_device *dev)
 239{
 240	struct xfrm_if *xi = netdev_priv(dev);
 241
 242	gro_cells_destroy(&xi->gro_cells);
 
 243}
 244
 245static int xfrmi_create(struct net_device *dev)
 246{
 247	struct xfrm_if *xi = netdev_priv(dev);
 248	struct net *net = dev_net(dev);
 249	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 250	int err;
 251
 252	dev->rtnl_link_ops = &xfrmi_link_ops;
 253	err = register_netdevice(dev);
 254	if (err < 0)
 255		goto out;
 256
 257	if (xi->p.collect_md)
 258		rcu_assign_pointer(xfrmn->collect_md_xfrmi, xi);
 259	else
 260		xfrmi_link(xfrmn, xi);
 261
 262	return 0;
 263
 264out:
 265	return err;
 266}
 267
 268static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
 269{
 270	struct xfrm_if __rcu **xip;
 271	struct xfrm_if *xi;
 272	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 273
 274	for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)];
 275	     (xi = rtnl_dereference(*xip)) != NULL;
 276	     xip = &xi->next)
 277		if (xi->p.if_id == p->if_id)
 278			return xi;
 279
 280	return NULL;
 281}
 282
 283static void xfrmi_dev_uninit(struct net_device *dev)
 284{
 285	struct xfrm_if *xi = netdev_priv(dev);
 286	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
 287
 288	if (xi->p.collect_md)
 289		RCU_INIT_POINTER(xfrmn->collect_md_xfrmi, NULL);
 290	else
 291		xfrmi_unlink(xfrmn, xi);
 292}
 293
 294static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
 295{
 296	skb_clear_tstamp(skb);
 297	skb->pkt_type = PACKET_HOST;
 298	skb->skb_iif = 0;
 299	skb->ignore_df = 0;
 300	skb_dst_drop(skb);
 301	nf_reset_ct(skb);
 302	nf_reset_trace(skb);
 303
 304	if (!xnet)
 305		return;
 306
 307	ipvs_reset(skb);
 308	secpath_reset(skb);
 309	skb_orphan(skb);
 310	skb->mark = 0;
 311}
 312
 313static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
 314		       int encap_type, unsigned short family)
 315{
 316	struct sec_path *sp;
 317
 318	sp = skb_sec_path(skb);
 319	if (sp && (sp->len || sp->olen) &&
 320	    !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
 321		goto discard;
 322
 323	XFRM_SPI_SKB_CB(skb)->family = family;
 324	if (family == AF_INET) {
 325		XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
 326		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
 327	} else {
 328		XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
 329		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
 330	}
 331
 332	return xfrm_input(skb, nexthdr, spi, encap_type);
 333discard:
 334	kfree_skb(skb);
 335	return 0;
 336}
 337
 338static int xfrmi4_rcv(struct sk_buff *skb)
 339{
 340	return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
 341}
 342
 343static int xfrmi6_rcv(struct sk_buff *skb)
 344{
 345	return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
 346			   0, 0, AF_INET6);
 347}
 348
 349static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 350{
 351	return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
 352}
 353
 354static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
 355{
 356	return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
 357}
 358
 359static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
 360{
 361	const struct xfrm_mode *inner_mode;
 362	struct net_device *dev;
 363	struct xfrm_state *x;
 364	struct xfrm_if *xi;
 365	bool xnet;
 366	int link;
 367
 368	if (err && !secpath_exists(skb))
 369		return 0;
 370
 371	x = xfrm_input_state(skb);
 372
 373	xi = xfrmi_lookup(xs_net(x), x);
 374	if (!xi)
 375		return 1;
 376
 377	link = skb->dev->ifindex;
 378	dev = xi->dev;
 379	skb->dev = dev;
 380
 381	if (err) {
 382		DEV_STATS_INC(dev, rx_errors);
 383		DEV_STATS_INC(dev, rx_dropped);
 384
 385		return 0;
 386	}
 387
 388	xnet = !net_eq(xi->net, dev_net(skb->dev));
 389
 390	if (xnet) {
 391		inner_mode = &x->inner_mode;
 392
 393		if (x->sel.family == AF_UNSPEC) {
 394			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
 395			if (inner_mode == NULL) {
 396				XFRM_INC_STATS(dev_net(skb->dev),
 397					       LINUX_MIB_XFRMINSTATEMODEERROR);
 398				return -EINVAL;
 399			}
 400		}
 401
 402		if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
 403				       inner_mode->family))
 404			return -EPERM;
 405	}
 406
 407	xfrmi_scrub_packet(skb, xnet);
 408	if (xi->p.collect_md) {
 409		struct metadata_dst *md_dst;
 410
 411		md_dst = metadata_dst_alloc(0, METADATA_XFRM, GFP_ATOMIC);
 412		if (!md_dst)
 413			return -ENOMEM;
 414
 415		md_dst->u.xfrm_info.if_id = x->if_id;
 416		md_dst->u.xfrm_info.link = link;
 417		skb_dst_set(skb, (struct dst_entry *)md_dst);
 418	}
 419	dev_sw_netstats_rx_add(dev, skb->len);
 420
 421	return 0;
 422}
 423
 424static int
 425xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 426{
 427	struct xfrm_if *xi = netdev_priv(dev);
 428	struct dst_entry *dst = skb_dst(skb);
 429	unsigned int length = skb->len;
 430	struct net_device *tdev;
 431	struct xfrm_state *x;
 432	int err = -1;
 433	u32 if_id;
 434	int mtu;
 435
 436	if (xi->p.collect_md) {
 437		struct xfrm_md_info *md_info = skb_xfrm_md_info(skb);
 438
 439		if (unlikely(!md_info))
 440			return -EINVAL;
 441
 442		if_id = md_info->if_id;
 443		fl->flowi_oif = md_info->link;
 444		if (md_info->dst_orig) {
 445			struct dst_entry *tmp_dst = dst;
 446
 447			dst = md_info->dst_orig;
 448			skb_dst_set(skb, dst);
 449			md_info->dst_orig = NULL;
 450			dst_release(tmp_dst);
 451		}
 452	} else {
 453		if_id = xi->p.if_id;
 454	}
 455
 456	dst_hold(dst);
 457	dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, if_id);
 458	if (IS_ERR(dst)) {
 459		err = PTR_ERR(dst);
 460		dst = NULL;
 461		goto tx_err_link_failure;
 462	}
 463
 464	x = dst->xfrm;
 465	if (!x)
 466		goto tx_err_link_failure;
 467
 468	if (x->if_id != if_id)
 469		goto tx_err_link_failure;
 470
 471	tdev = dst->dev;
 472
 473	if (tdev == dev) {
 474		DEV_STATS_INC(dev, collisions);
 475		net_warn_ratelimited("%s: Local routing loop detected!\n",
 476				     dev->name);
 477		goto tx_err_dst_release;
 478	}
 479
 480	mtu = dst_mtu(dst);
 481	if ((!skb_is_gso(skb) && skb->len > mtu) ||
 482	    (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) {
 483		skb_dst_update_pmtu_no_confirm(skb, mtu);
 484
 485		if (skb->protocol == htons(ETH_P_IPV6)) {
 486			if (mtu < IPV6_MIN_MTU)
 487				mtu = IPV6_MIN_MTU;
 488
 489			if (skb->len > 1280)
 490				icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 491			else
 492				goto xmit;
 493		} else {
 494			if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
 495				goto xmit;
 496			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 497				      htonl(mtu));
 498		}
 499
 500		dst_release(dst);
 501		return -EMSGSIZE;
 502	}
 503
 504xmit:
 505	xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
 506	skb_dst_set(skb, dst);
 507	skb->dev = tdev;
 508
 509	err = dst_output(xi->net, skb->sk, skb);
 510	if (net_xmit_eval(err) == 0) {
 511		dev_sw_netstats_tx_add(dev, 1, length);
 512	} else {
 513		DEV_STATS_INC(dev, tx_errors);
 514		DEV_STATS_INC(dev, tx_aborted_errors);
 515	}
 516
 517	return 0;
 518tx_err_link_failure:
 519	DEV_STATS_INC(dev, tx_carrier_errors);
 520	dst_link_failure(skb);
 521tx_err_dst_release:
 522	dst_release(dst);
 523	return err;
 524}
 525
 526static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
 527{
 528	struct xfrm_if *xi = netdev_priv(dev);
 529	struct dst_entry *dst = skb_dst(skb);
 530	struct flowi fl;
 531	int ret;
 532
 533	memset(&fl, 0, sizeof(fl));
 534
 535	switch (skb->protocol) {
 536	case htons(ETH_P_IPV6):
 537		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 538		xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
 539		if (!dst) {
 540			fl.u.ip6.flowi6_oif = dev->ifindex;
 541			fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
 542			dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
 543			if (dst->error) {
 544				dst_release(dst);
 545				DEV_STATS_INC(dev, tx_carrier_errors);
 546				goto tx_err;
 547			}
 548			skb_dst_set(skb, dst);
 549		}
 550		break;
 551	case htons(ETH_P_IP):
 552		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 553		xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
 554		if (!dst) {
 555			struct rtable *rt;
 556
 557			fl.u.ip4.flowi4_oif = dev->ifindex;
 558			fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
 559			rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
 560			if (IS_ERR(rt)) {
 561				DEV_STATS_INC(dev, tx_carrier_errors);
 562				goto tx_err;
 563			}
 564			skb_dst_set(skb, &rt->dst);
 565		}
 566		break;
 567	default:
 568		goto tx_err;
 569	}
 570
 571	fl.flowi_oif = xi->p.link;
 572
 573	ret = xfrmi_xmit2(skb, dev, &fl);
 574	if (ret < 0)
 575		goto tx_err;
 576
 577	return NETDEV_TX_OK;
 578
 579tx_err:
 580	DEV_STATS_INC(dev, tx_errors);
 581	DEV_STATS_INC(dev, tx_dropped);
 582	kfree_skb(skb);
 583	return NETDEV_TX_OK;
 584}
 585
 586static int xfrmi4_err(struct sk_buff *skb, u32 info)
 587{
 588	const struct iphdr *iph = (const struct iphdr *)skb->data;
 589	struct net *net = dev_net(skb->dev);
 590	int protocol = iph->protocol;
 591	struct ip_comp_hdr *ipch;
 592	struct ip_esp_hdr *esph;
 593	struct ip_auth_hdr *ah ;
 594	struct xfrm_state *x;
 595	struct xfrm_if *xi;
 596	__be32 spi;
 597
 598	switch (protocol) {
 599	case IPPROTO_ESP:
 600		esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
 601		spi = esph->spi;
 602		break;
 603	case IPPROTO_AH:
 604		ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
 605		spi = ah->spi;
 606		break;
 607	case IPPROTO_COMP:
 608		ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
 609		spi = htonl(ntohs(ipch->cpi));
 610		break;
 611	default:
 612		return 0;
 613	}
 614
 615	switch (icmp_hdr(skb)->type) {
 616	case ICMP_DEST_UNREACH:
 617		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 618			return 0;
 619		break;
 620	case ICMP_REDIRECT:
 621		break;
 622	default:
 623		return 0;
 624	}
 625
 626	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
 627			      spi, protocol, AF_INET);
 628	if (!x)
 629		return 0;
 630
 631	xi = xfrmi_lookup(net, x);
 632	if (!xi) {
 633		xfrm_state_put(x);
 634		return -1;
 635	}
 636
 637	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
 638		ipv4_update_pmtu(skb, net, info, 0, protocol);
 639	else
 640		ipv4_redirect(skb, net, 0, protocol);
 641	xfrm_state_put(x);
 642
 643	return 0;
 644}
 645
 646static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 647		    u8 type, u8 code, int offset, __be32 info)
 648{
 649	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
 650	struct net *net = dev_net(skb->dev);
 651	int protocol = iph->nexthdr;
 652	struct ip_comp_hdr *ipch;
 653	struct ip_esp_hdr *esph;
 654	struct ip_auth_hdr *ah;
 655	struct xfrm_state *x;
 656	struct xfrm_if *xi;
 657	__be32 spi;
 658
 659	switch (protocol) {
 660	case IPPROTO_ESP:
 661		esph = (struct ip_esp_hdr *)(skb->data + offset);
 662		spi = esph->spi;
 663		break;
 664	case IPPROTO_AH:
 665		ah = (struct ip_auth_hdr *)(skb->data + offset);
 666		spi = ah->spi;
 667		break;
 668	case IPPROTO_COMP:
 669		ipch = (struct ip_comp_hdr *)(skb->data + offset);
 670		spi = htonl(ntohs(ipch->cpi));
 671		break;
 672	default:
 673		return 0;
 674	}
 675
 676	if (type != ICMPV6_PKT_TOOBIG &&
 677	    type != NDISC_REDIRECT)
 678		return 0;
 679
 680	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
 681			      spi, protocol, AF_INET6);
 682	if (!x)
 683		return 0;
 684
 685	xi = xfrmi_lookup(net, x);
 686	if (!xi) {
 687		xfrm_state_put(x);
 688		return -1;
 689	}
 690
 691	if (type == NDISC_REDIRECT)
 692		ip6_redirect(skb, net, skb->dev->ifindex, 0,
 693			     sock_net_uid(net, NULL));
 694	else
 695		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 696	xfrm_state_put(x);
 697
 698	return 0;
 699}
 700
 701static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
 702{
 703	if (xi->p.link != p->link)
 704		return -EINVAL;
 705
 706	xi->p.if_id = p->if_id;
 707
 708	return 0;
 709}
 710
 711static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
 712{
 713	struct net *net = xi->net;
 714	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 715	int err;
 716
 717	xfrmi_unlink(xfrmn, xi);
 718	synchronize_net();
 719	err = xfrmi_change(xi, p);
 720	xfrmi_link(xfrmn, xi);
 721	netdev_state_change(xi->dev);
 722	return err;
 723}
 724
 725static int xfrmi_get_iflink(const struct net_device *dev)
 726{
 727	struct xfrm_if *xi = netdev_priv(dev);
 728
 729	return READ_ONCE(xi->p.link);
 730}
 731
 732static const struct net_device_ops xfrmi_netdev_ops = {
 733	.ndo_init	= xfrmi_dev_init,
 734	.ndo_uninit	= xfrmi_dev_uninit,
 735	.ndo_start_xmit = xfrmi_xmit,
 736	.ndo_get_stats64 = dev_get_tstats64,
 737	.ndo_get_iflink = xfrmi_get_iflink,
 738};
 739
 740static void xfrmi_dev_setup(struct net_device *dev)
 741{
 742	dev->netdev_ops 	= &xfrmi_netdev_ops;
 743	dev->header_ops		= &ip_tunnel_header_ops;
 744	dev->type		= ARPHRD_NONE;
 745	dev->mtu		= ETH_DATA_LEN;
 746	dev->min_mtu		= ETH_MIN_MTU;
 747	dev->max_mtu		= IP_MAX_MTU;
 748	dev->flags 		= IFF_NOARP;
 749	dev->needs_free_netdev	= true;
 750	dev->priv_destructor	= xfrmi_dev_free;
 751	dev->pcpu_stat_type	= NETDEV_PCPU_STAT_TSTATS;
 752	netif_keep_dst(dev);
 753
 754	eth_broadcast_addr(dev->broadcast);
 755}
 756
 757#define XFRMI_FEATURES (NETIF_F_SG |		\
 758			NETIF_F_FRAGLIST |	\
 759			NETIF_F_GSO_SOFTWARE |	\
 760			NETIF_F_HW_CSUM)
 761
 762static int xfrmi_dev_init(struct net_device *dev)
 763{
 764	struct xfrm_if *xi = netdev_priv(dev);
 765	struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
 766	int err;
 767
 
 
 
 
 768	err = gro_cells_init(&xi->gro_cells, dev);
 769	if (err)
 
 770		return err;
 
 771
 772	dev->lltx = true;
 773	dev->features |= XFRMI_FEATURES;
 774	dev->hw_features |= XFRMI_FEATURES;
 775
 776	if (phydev) {
 777		dev->needed_headroom = phydev->needed_headroom;
 778		dev->needed_tailroom = phydev->needed_tailroom;
 779
 780		if (is_zero_ether_addr(dev->dev_addr))
 781			eth_hw_addr_inherit(dev, phydev);
 782		if (is_zero_ether_addr(dev->broadcast))
 783			memcpy(dev->broadcast, phydev->broadcast,
 784			       dev->addr_len);
 785	} else {
 786		eth_hw_addr_random(dev);
 787		eth_broadcast_addr(dev->broadcast);
 788	}
 789
 790	return 0;
 791}
 792
 793static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
 794			 struct netlink_ext_ack *extack)
 795{
 796	return 0;
 797}
 798
 799static void xfrmi_netlink_parms(struct nlattr *data[],
 800			       struct xfrm_if_parms *parms)
 801{
 802	memset(parms, 0, sizeof(*parms));
 803
 804	if (!data)
 805		return;
 806
 807	if (data[IFLA_XFRM_LINK])
 808		parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
 809
 810	if (data[IFLA_XFRM_IF_ID])
 811		parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
 812
 813	if (data[IFLA_XFRM_COLLECT_METADATA])
 814		parms->collect_md = true;
 815}
 816
 817static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
 818			struct nlattr *tb[], struct nlattr *data[],
 819			struct netlink_ext_ack *extack)
 820{
 821	struct net *net = dev_net(dev);
 822	struct xfrm_if_parms p = {};
 823	struct xfrm_if *xi;
 824	int err;
 825
 826	xfrmi_netlink_parms(data, &p);
 827	if (p.collect_md) {
 828		struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 829
 830		if (p.link || p.if_id) {
 831			NL_SET_ERR_MSG(extack, "link and if_id must be zero");
 832			return -EINVAL;
 833		}
 834
 835		if (rtnl_dereference(xfrmn->collect_md_xfrmi))
 836			return -EEXIST;
 837
 838	} else {
 839		if (!p.if_id) {
 840			NL_SET_ERR_MSG(extack, "if_id must be non zero");
 841			return -EINVAL;
 842		}
 843
 844		xi = xfrmi_locate(net, &p);
 845		if (xi)
 846			return -EEXIST;
 847	}
 848
 849	xi = netdev_priv(dev);
 850	xi->p = p;
 851	xi->net = net;
 852	xi->dev = dev;
 853
 854	err = xfrmi_create(dev);
 855	return err;
 856}
 857
 858static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
 859{
 860	unregister_netdevice_queue(dev, head);
 861}
 862
 863static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
 864			   struct nlattr *data[],
 865			   struct netlink_ext_ack *extack)
 866{
 867	struct xfrm_if *xi = netdev_priv(dev);
 868	struct net *net = xi->net;
 869	struct xfrm_if_parms p = {};
 870
 871	xfrmi_netlink_parms(data, &p);
 872	if (!p.if_id) {
 873		NL_SET_ERR_MSG(extack, "if_id must be non zero");
 874		return -EINVAL;
 875	}
 876
 877	if (p.collect_md) {
 878		NL_SET_ERR_MSG(extack, "collect_md can't be changed");
 879		return -EINVAL;
 880	}
 881
 882	xi = xfrmi_locate(net, &p);
 883	if (!xi) {
 884		xi = netdev_priv(dev);
 885	} else {
 886		if (xi->dev != dev)
 887			return -EEXIST;
 888		if (xi->p.collect_md) {
 889			NL_SET_ERR_MSG(extack,
 890				       "device can't be changed to collect_md");
 891			return -EINVAL;
 892		}
 893	}
 894
 895	return xfrmi_update(xi, &p);
 896}
 897
 898static size_t xfrmi_get_size(const struct net_device *dev)
 899{
 900	return
 901		/* IFLA_XFRM_LINK */
 902		nla_total_size(4) +
 903		/* IFLA_XFRM_IF_ID */
 904		nla_total_size(4) +
 905		/* IFLA_XFRM_COLLECT_METADATA */
 906		nla_total_size(0) +
 907		0;
 908}
 909
 910static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
 911{
 912	struct xfrm_if *xi = netdev_priv(dev);
 913	struct xfrm_if_parms *parm = &xi->p;
 914
 915	if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
 916	    nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id) ||
 917	    (xi->p.collect_md && nla_put_flag(skb, IFLA_XFRM_COLLECT_METADATA)))
 918		goto nla_put_failure;
 919	return 0;
 920
 921nla_put_failure:
 922	return -EMSGSIZE;
 923}
 924
 925static struct net *xfrmi_get_link_net(const struct net_device *dev)
 926{
 927	struct xfrm_if *xi = netdev_priv(dev);
 928
 929	return READ_ONCE(xi->net);
 930}
 931
 932static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
 933	[IFLA_XFRM_UNSPEC]		= { .strict_start_type = IFLA_XFRM_COLLECT_METADATA },
 934	[IFLA_XFRM_LINK]		= { .type = NLA_U32 },
 935	[IFLA_XFRM_IF_ID]		= { .type = NLA_U32 },
 936	[IFLA_XFRM_COLLECT_METADATA]	= { .type = NLA_FLAG },
 937};
 938
 939static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
 940	.kind		= "xfrm",
 941	.maxtype	= IFLA_XFRM_MAX,
 942	.policy		= xfrmi_policy,
 943	.priv_size	= sizeof(struct xfrm_if),
 944	.setup		= xfrmi_dev_setup,
 945	.validate	= xfrmi_validate,
 946	.newlink	= xfrmi_newlink,
 947	.dellink	= xfrmi_dellink,
 948	.changelink	= xfrmi_changelink,
 949	.get_size	= xfrmi_get_size,
 950	.fill_info	= xfrmi_fill_info,
 951	.get_link_net	= xfrmi_get_link_net,
 952};
 953
 954static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list,
 955					     struct list_head *dev_to_kill)
 956{
 957	struct net *net;
 
 958
 959	ASSERT_RTNL();
 960	list_for_each_entry(net, net_exit_list, exit_list) {
 961		struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 962		struct xfrm_if __rcu **xip;
 963		struct xfrm_if *xi;
 964		int i;
 965
 966		for (i = 0; i < XFRMI_HASH_SIZE; i++) {
 967			for (xip = &xfrmn->xfrmi[i];
 968			     (xi = rtnl_dereference(*xip)) != NULL;
 969			     xip = &xi->next)
 970				unregister_netdevice_queue(xi->dev, dev_to_kill);
 971		}
 972		xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
 973		if (xi)
 974			unregister_netdevice_queue(xi->dev, dev_to_kill);
 975	}
 
 
 976}
 977
 978static struct pernet_operations xfrmi_net_ops = {
 979	.exit_batch_rtnl = xfrmi_exit_batch_rtnl,
 980	.id   = &xfrmi_net_id,
 981	.size = sizeof(struct xfrmi_net),
 982};
 983
 984static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
 985	.handler	=	xfrmi6_rcv,
 986	.input_handler	=	xfrmi6_input,
 987	.cb_handler	=	xfrmi_rcv_cb,
 988	.err_handler	=	xfrmi6_err,
 989	.priority	=	10,
 990};
 991
 992static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
 993	.handler	=	xfrm6_rcv,
 994	.input_handler	=	xfrm_input,
 995	.cb_handler	=	xfrmi_rcv_cb,
 996	.err_handler	=	xfrmi6_err,
 997	.priority	=	10,
 998};
 999
1000static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
1001	.handler	=	xfrm6_rcv,
1002	.input_handler	=	xfrm_input,
1003	.cb_handler	=	xfrmi_rcv_cb,
1004	.err_handler	=	xfrmi6_err,
1005	.priority	=	10,
1006};
1007
1008#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
1009static int xfrmi6_rcv_tunnel(struct sk_buff *skb)
1010{
1011	const xfrm_address_t *saddr;
1012	__be32 spi;
1013
1014	saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr;
1015	spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr);
1016
1017	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
1018}
1019
1020static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = {
1021	.handler	=	xfrmi6_rcv_tunnel,
1022	.cb_handler	=	xfrmi_rcv_cb,
1023	.err_handler	=	xfrmi6_err,
1024	.priority	=	2,
1025};
1026
1027static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
1028	.handler	=	xfrmi6_rcv_tunnel,
1029	.cb_handler	=	xfrmi_rcv_cb,
1030	.err_handler	=	xfrmi6_err,
1031	.priority	=	2,
1032};
1033#endif
1034
1035static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
1036	.handler	=	xfrmi4_rcv,
1037	.input_handler	=	xfrmi4_input,
1038	.cb_handler	=	xfrmi_rcv_cb,
1039	.err_handler	=	xfrmi4_err,
1040	.priority	=	10,
1041};
1042
1043static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
1044	.handler	=	xfrm4_rcv,
1045	.input_handler	=	xfrm_input,
1046	.cb_handler	=	xfrmi_rcv_cb,
1047	.err_handler	=	xfrmi4_err,
1048	.priority	=	10,
1049};
1050
1051static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
1052	.handler	=	xfrm4_rcv,
1053	.input_handler	=	xfrm_input,
1054	.cb_handler	=	xfrmi_rcv_cb,
1055	.err_handler	=	xfrmi4_err,
1056	.priority	=	10,
1057};
1058
1059#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
1060static int xfrmi4_rcv_tunnel(struct sk_buff *skb)
1061{
1062	return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
1063}
1064
1065static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = {
1066	.handler	=	xfrmi4_rcv_tunnel,
1067	.cb_handler	=	xfrmi_rcv_cb,
1068	.err_handler	=	xfrmi4_err,
1069	.priority	=	3,
1070};
1071
1072static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = {
1073	.handler	=	xfrmi4_rcv_tunnel,
1074	.cb_handler	=	xfrmi_rcv_cb,
1075	.err_handler	=	xfrmi4_err,
1076	.priority	=	2,
1077};
1078#endif
1079
1080static int __init xfrmi4_init(void)
1081{
1082	int err;
1083
1084	err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
1085	if (err < 0)
1086		goto xfrm_proto_esp_failed;
1087	err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
1088	if (err < 0)
1089		goto xfrm_proto_ah_failed;
1090	err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
1091	if (err < 0)
1092		goto xfrm_proto_comp_failed;
1093#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
1094	err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET);
1095	if (err < 0)
1096		goto xfrm_tunnel_ipip_failed;
1097	err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6);
1098	if (err < 0)
1099		goto xfrm_tunnel_ipip6_failed;
1100#endif
1101
1102	return 0;
1103
1104#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
1105xfrm_tunnel_ipip6_failed:
1106	xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
1107xfrm_tunnel_ipip_failed:
1108	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
1109#endif
1110xfrm_proto_comp_failed:
1111	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
1112xfrm_proto_ah_failed:
1113	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
1114xfrm_proto_esp_failed:
1115	return err;
1116}
1117
1118static void xfrmi4_fini(void)
1119{
1120#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
1121	xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6);
1122	xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
1123#endif
1124	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
1125	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
1126	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
1127}
1128
1129static int __init xfrmi6_init(void)
1130{
1131	int err;
1132
1133	err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
1134	if (err < 0)
1135		goto xfrm_proto_esp_failed;
1136	err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
1137	if (err < 0)
1138		goto xfrm_proto_ah_failed;
1139	err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
1140	if (err < 0)
1141		goto xfrm_proto_comp_failed;
1142#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
1143	err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6);
1144	if (err < 0)
1145		goto xfrm_tunnel_ipv6_failed;
1146	err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET);
1147	if (err < 0)
1148		goto xfrm_tunnel_ip6ip_failed;
1149#endif
1150
1151	return 0;
1152
1153#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
1154xfrm_tunnel_ip6ip_failed:
1155	xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
1156xfrm_tunnel_ipv6_failed:
1157	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
1158#endif
1159xfrm_proto_comp_failed:
1160	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
1161xfrm_proto_ah_failed:
1162	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
1163xfrm_proto_esp_failed:
1164	return err;
1165}
1166
1167static void xfrmi6_fini(void)
1168{
1169#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
1170	xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET);
1171	xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
1172#endif
1173	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
1174	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
1175	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
1176}
1177
1178static const struct xfrm_if_cb xfrm_if_cb = {
1179	.decode_session =	xfrmi_decode_session,
1180};
1181
1182static int __init xfrmi_init(void)
1183{
1184	const char *msg;
1185	int err;
1186
1187	pr_info("IPsec XFRM device driver\n");
1188
1189	msg = "tunnel device";
1190	err = register_pernet_device(&xfrmi_net_ops);
1191	if (err < 0)
1192		goto pernet_dev_failed;
1193
1194	msg = "xfrm4 protocols";
1195	err = xfrmi4_init();
1196	if (err < 0)
1197		goto xfrmi4_failed;
1198
1199	msg = "xfrm6 protocols";
1200	err = xfrmi6_init();
1201	if (err < 0)
1202		goto xfrmi6_failed;
1203
1204
1205	msg = "netlink interface";
1206	err = rtnl_link_register(&xfrmi_link_ops);
1207	if (err < 0)
1208		goto rtnl_link_failed;
1209
1210	err = register_xfrm_interface_bpf();
1211	if (err < 0)
1212		goto kfunc_failed;
1213
1214	lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
1215
1216	xfrm_if_register_cb(&xfrm_if_cb);
1217
1218	return err;
1219
1220kfunc_failed:
1221	rtnl_link_unregister(&xfrmi_link_ops);
1222rtnl_link_failed:
1223	xfrmi6_fini();
1224xfrmi6_failed:
1225	xfrmi4_fini();
1226xfrmi4_failed:
1227	unregister_pernet_device(&xfrmi_net_ops);
1228pernet_dev_failed:
1229	pr_err("xfrmi init: failed to register %s\n", msg);
1230	return err;
1231}
1232
1233static void __exit xfrmi_fini(void)
1234{
1235	xfrm_if_unregister_cb();
1236	lwtunnel_encap_del_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
1237	rtnl_link_unregister(&xfrmi_link_ops);
1238	xfrmi4_fini();
1239	xfrmi6_fini();
1240	unregister_pernet_device(&xfrmi_net_ops);
1241}
1242
1243module_init(xfrmi_init);
1244module_exit(xfrmi_fini);
1245MODULE_LICENSE("GPL");
1246MODULE_ALIAS_RTNL_LINK("xfrm");
1247MODULE_ALIAS_NETDEV("xfrm0");
1248MODULE_AUTHOR("Steffen Klassert");
1249MODULE_DESCRIPTION("XFRM virtual interface");