Linux Audio

Check our new training course

Loading...
v5.4
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 *	XFRM virtual interface
  4 *
  5 *	Copyright (C) 2018 secunet Security Networks AG
  6 *
  7 *	Author:
  8 *	Steffen Klassert <steffen.klassert@secunet.com>
  9 */
 10
 11#include <linux/module.h>
 12#include <linux/capability.h>
 13#include <linux/errno.h>
 14#include <linux/types.h>
 15#include <linux/sockios.h>
 16#include <linux/icmp.h>
 17#include <linux/if.h>
 18#include <linux/in.h>
 19#include <linux/ip.h>
 20#include <linux/net.h>
 21#include <linux/in6.h>
 22#include <linux/netdevice.h>
 23#include <linux/if_link.h>
 24#include <linux/if_arp.h>
 25#include <linux/icmpv6.h>
 26#include <linux/init.h>
 27#include <linux/route.h>
 28#include <linux/rtnetlink.h>
 29#include <linux/netfilter_ipv6.h>
 30#include <linux/slab.h>
 31#include <linux/hash.h>
 32
 33#include <linux/uaccess.h>
 34#include <linux/atomic.h>
 35
 36#include <net/icmp.h>
 37#include <net/ip.h>
 38#include <net/ipv6.h>
 39#include <net/ip6_route.h>
 
 40#include <net/addrconf.h>
 41#include <net/xfrm.h>
 42#include <net/net_namespace.h>
 43#include <net/netns/generic.h>
 44#include <linux/etherdevice.h>
 45
 46static int xfrmi_dev_init(struct net_device *dev);
 47static void xfrmi_dev_setup(struct net_device *dev);
 48static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
 49static unsigned int xfrmi_net_id __read_mostly;
 
 
 
 
 50
 51struct xfrmi_net {
 52	/* lists for storing interfaces in use */
 53	struct xfrm_if __rcu *xfrmi[1];
 54};
 55
 56#define for_each_xfrmi_rcu(start, xi) \
 57	for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
 58
 
 
 
 
 
 59static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
 60{
 61	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 62	struct xfrm_if *xi;
 63
 64	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
 65		if (x->if_id == xi->p.if_id &&
 66		    (xi->dev->flags & IFF_UP))
 67			return xi;
 68	}
 69
 70	return NULL;
 71}
 72
 73static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
 74					    unsigned short family)
 75{
 76	struct xfrmi_net *xfrmn;
 77	struct xfrm_if *xi;
 78	int ifindex = 0;
 79
 80	if (!secpath_exists(skb) || !skb->dev)
 81		return NULL;
 82
 83	switch (family) {
 84	case AF_INET6:
 85		ifindex = inet6_sdif(skb);
 86		break;
 87	case AF_INET:
 88		ifindex = inet_sdif(skb);
 89		break;
 90	}
 91	if (!ifindex)
 92		ifindex = skb->dev->ifindex;
 93
 94	xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
 
 95
 96	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
 97		if (ifindex == xi->dev->ifindex &&
 98			(xi->dev->flags & IFF_UP))
 99				return xi;
100	}
101
102	return NULL;
 
 
 
 
 
103}
104
105static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
106{
107	struct xfrm_if __rcu **xip = &xfrmn->xfrmi[0];
108
109	rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
110	rcu_assign_pointer(*xip, xi);
111}
112
113static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
114{
115	struct xfrm_if __rcu **xip;
116	struct xfrm_if *iter;
117
118	for (xip = &xfrmn->xfrmi[0];
119	     (iter = rtnl_dereference(*xip)) != NULL;
120	     xip = &iter->next) {
121		if (xi == iter) {
122			rcu_assign_pointer(*xip, xi->next);
123			break;
124		}
125	}
126}
127
128static void xfrmi_dev_free(struct net_device *dev)
129{
130	struct xfrm_if *xi = netdev_priv(dev);
131
132	gro_cells_destroy(&xi->gro_cells);
133	free_percpu(dev->tstats);
134}
135
136static int xfrmi_create(struct net_device *dev)
137{
138	struct xfrm_if *xi = netdev_priv(dev);
139	struct net *net = dev_net(dev);
140	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
141	int err;
142
143	dev->rtnl_link_ops = &xfrmi_link_ops;
144	err = register_netdevice(dev);
145	if (err < 0)
146		goto out;
147
148	dev_hold(dev);
149	xfrmi_link(xfrmn, xi);
150
151	return 0;
152
153out:
154	return err;
155}
156
157static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
158{
159	struct xfrm_if __rcu **xip;
160	struct xfrm_if *xi;
161	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
162
163	for (xip = &xfrmn->xfrmi[0];
164	     (xi = rtnl_dereference(*xip)) != NULL;
165	     xip = &xi->next)
166		if (xi->p.if_id == p->if_id)
167			return xi;
168
169	return NULL;
170}
171
172static void xfrmi_dev_uninit(struct net_device *dev)
173{
174	struct xfrm_if *xi = netdev_priv(dev);
175	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
176
177	xfrmi_unlink(xfrmn, xi);
178	dev_put(dev);
179}
180
181static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
182{
183	skb->tstamp = 0;
184	skb->pkt_type = PACKET_HOST;
185	skb->skb_iif = 0;
186	skb->ignore_df = 0;
187	skb_dst_drop(skb);
188	nf_reset_ct(skb);
189	nf_reset_trace(skb);
190
191	if (!xnet)
192		return;
193
194	ipvs_reset(skb);
195	secpath_reset(skb);
196	skb_orphan(skb);
197	skb->mark = 0;
198}
199
200static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
201{
202	const struct xfrm_mode *inner_mode;
203	struct pcpu_sw_netstats *tstats;
204	struct net_device *dev;
205	struct xfrm_state *x;
206	struct xfrm_if *xi;
207	bool xnet;
208
209	if (err && !secpath_exists(skb))
210		return 0;
211
212	x = xfrm_input_state(skb);
213
214	xi = xfrmi_lookup(xs_net(x), x);
215	if (!xi)
216		return 1;
217
218	dev = xi->dev;
219	skb->dev = dev;
220
221	if (err) {
222		dev->stats.rx_errors++;
223		dev->stats.rx_dropped++;
224
225		return 0;
226	}
227
228	xnet = !net_eq(xi->net, dev_net(skb->dev));
229
230	if (xnet) {
231		inner_mode = &x->inner_mode;
232
233		if (x->sel.family == AF_UNSPEC) {
234			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
235			if (inner_mode == NULL) {
236				XFRM_INC_STATS(dev_net(skb->dev),
237					       LINUX_MIB_XFRMINSTATEMODEERROR);
238				return -EINVAL;
239			}
240		}
241
242		if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
243				       inner_mode->family))
244			return -EPERM;
245	}
246
247	xfrmi_scrub_packet(skb, xnet);
248
249	tstats = this_cpu_ptr(dev->tstats);
250
251	u64_stats_update_begin(&tstats->syncp);
252	tstats->rx_packets++;
253	tstats->rx_bytes += skb->len;
254	u64_stats_update_end(&tstats->syncp);
255
256	return 0;
257}
258
259static int
260xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
261{
262	struct xfrm_if *xi = netdev_priv(dev);
263	struct net_device_stats *stats = &xi->dev->stats;
264	struct dst_entry *dst = skb_dst(skb);
265	unsigned int length = skb->len;
266	struct net_device *tdev;
267	struct xfrm_state *x;
268	int err = -1;
269	int mtu;
270
271	if (!dst)
272		goto tx_err_link_failure;
273
274	dst_hold(dst);
275	dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
276	if (IS_ERR(dst)) {
277		err = PTR_ERR(dst);
278		dst = NULL;
279		goto tx_err_link_failure;
280	}
281
282	x = dst->xfrm;
283	if (!x)
284		goto tx_err_link_failure;
285
286	if (x->if_id != xi->p.if_id)
287		goto tx_err_link_failure;
288
289	tdev = dst->dev;
290
291	if (tdev == dev) {
292		stats->collisions++;
293		net_warn_ratelimited("%s: Local routing loop detected!\n",
294				     dev->name);
295		goto tx_err_dst_release;
296	}
297
298	mtu = dst_mtu(dst);
299	if (!skb->ignore_df && skb->len > mtu) {
300		skb_dst_update_pmtu(skb, mtu);
301
302		if (skb->protocol == htons(ETH_P_IPV6)) {
303			if (mtu < IPV6_MIN_MTU)
304				mtu = IPV6_MIN_MTU;
305
306			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
307		} else {
308			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
309				  htonl(mtu));
310		}
311
312		dst_release(dst);
313		return -EMSGSIZE;
314	}
315
316	xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
317	skb_dst_set(skb, dst);
318	skb->dev = tdev;
319
320	err = dst_output(xi->net, skb->sk, skb);
321	if (net_xmit_eval(err) == 0) {
322		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
323
324		u64_stats_update_begin(&tstats->syncp);
325		tstats->tx_bytes += length;
326		tstats->tx_packets++;
327		u64_stats_update_end(&tstats->syncp);
328	} else {
329		stats->tx_errors++;
330		stats->tx_aborted_errors++;
331	}
332
333	return 0;
334tx_err_link_failure:
335	stats->tx_carrier_errors++;
336	dst_link_failure(skb);
337tx_err_dst_release:
338	dst_release(dst);
339	return err;
340}
341
342static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
343{
344	struct xfrm_if *xi = netdev_priv(dev);
345	struct net_device_stats *stats = &xi->dev->stats;
 
346	struct flowi fl;
347	int ret;
348
349	memset(&fl, 0, sizeof(fl));
350
351	switch (skb->protocol) {
352	case htons(ETH_P_IPV6):
353		xfrm_decode_session(skb, &fl, AF_INET6);
354		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 
 
 
 
 
 
 
 
 
 
 
355		break;
356	case htons(ETH_P_IP):
357		xfrm_decode_session(skb, &fl, AF_INET);
358		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
 
 
 
 
 
 
 
 
 
 
 
359		break;
360	default:
361		goto tx_err;
362	}
363
364	fl.flowi_oif = xi->p.link;
365
366	ret = xfrmi_xmit2(skb, dev, &fl);
367	if (ret < 0)
368		goto tx_err;
369
370	return NETDEV_TX_OK;
371
372tx_err:
373	stats->tx_errors++;
374	stats->tx_dropped++;
375	kfree_skb(skb);
376	return NETDEV_TX_OK;
377}
378
379static int xfrmi4_err(struct sk_buff *skb, u32 info)
380{
381	const struct iphdr *iph = (const struct iphdr *)skb->data;
382	struct net *net = dev_net(skb->dev);
383	int protocol = iph->protocol;
384	struct ip_comp_hdr *ipch;
385	struct ip_esp_hdr *esph;
386	struct ip_auth_hdr *ah ;
387	struct xfrm_state *x;
388	struct xfrm_if *xi;
389	__be32 spi;
390
391	switch (protocol) {
392	case IPPROTO_ESP:
393		esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
394		spi = esph->spi;
395		break;
396	case IPPROTO_AH:
397		ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
398		spi = ah->spi;
399		break;
400	case IPPROTO_COMP:
401		ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
402		spi = htonl(ntohs(ipch->cpi));
403		break;
404	default:
405		return 0;
406	}
407
408	switch (icmp_hdr(skb)->type) {
409	case ICMP_DEST_UNREACH:
410		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
411			return 0;
412	case ICMP_REDIRECT:
413		break;
414	default:
415		return 0;
416	}
417
418	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
419			      spi, protocol, AF_INET);
420	if (!x)
421		return 0;
422
423	xi = xfrmi_lookup(net, x);
424	if (!xi) {
425		xfrm_state_put(x);
426		return -1;
427	}
428
429	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
430		ipv4_update_pmtu(skb, net, info, 0, protocol);
431	else
432		ipv4_redirect(skb, net, 0, protocol);
433	xfrm_state_put(x);
434
435	return 0;
436}
437
438static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
439		    u8 type, u8 code, int offset, __be32 info)
440{
441	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
442	struct net *net = dev_net(skb->dev);
443	int protocol = iph->nexthdr;
444	struct ip_comp_hdr *ipch;
445	struct ip_esp_hdr *esph;
446	struct ip_auth_hdr *ah;
447	struct xfrm_state *x;
448	struct xfrm_if *xi;
449	__be32 spi;
450
451	switch (protocol) {
452	case IPPROTO_ESP:
453		esph = (struct ip_esp_hdr *)(skb->data + offset);
454		spi = esph->spi;
455		break;
456	case IPPROTO_AH:
457		ah = (struct ip_auth_hdr *)(skb->data + offset);
458		spi = ah->spi;
459		break;
460	case IPPROTO_COMP:
461		ipch = (struct ip_comp_hdr *)(skb->data + offset);
462		spi = htonl(ntohs(ipch->cpi));
463		break;
464	default:
465		return 0;
466	}
467
468	if (type != ICMPV6_PKT_TOOBIG &&
469	    type != NDISC_REDIRECT)
470		return 0;
471
472	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
473			      spi, protocol, AF_INET6);
474	if (!x)
475		return 0;
476
477	xi = xfrmi_lookup(net, x);
478	if (!xi) {
479		xfrm_state_put(x);
480		return -1;
481	}
482
483	if (type == NDISC_REDIRECT)
484		ip6_redirect(skb, net, skb->dev->ifindex, 0,
485			     sock_net_uid(net, NULL));
486	else
487		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
488	xfrm_state_put(x);
489
490	return 0;
491}
492
493static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
494{
495	if (xi->p.link != p->link)
496		return -EINVAL;
497
498	xi->p.if_id = p->if_id;
499
500	return 0;
501}
502
503static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
504{
505	struct net *net = xi->net;
506	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
507	int err;
508
509	xfrmi_unlink(xfrmn, xi);
510	synchronize_net();
511	err = xfrmi_change(xi, p);
512	xfrmi_link(xfrmn, xi);
513	netdev_state_change(xi->dev);
514	return err;
515}
516
517static void xfrmi_get_stats64(struct net_device *dev,
518			       struct rtnl_link_stats64 *s)
519{
520	int cpu;
521
522	for_each_possible_cpu(cpu) {
523		struct pcpu_sw_netstats *stats;
524		struct pcpu_sw_netstats tmp;
525		int start;
526
527		stats = per_cpu_ptr(dev->tstats, cpu);
528		do {
529			start = u64_stats_fetch_begin_irq(&stats->syncp);
530			tmp.rx_packets = stats->rx_packets;
531			tmp.rx_bytes   = stats->rx_bytes;
532			tmp.tx_packets = stats->tx_packets;
533			tmp.tx_bytes   = stats->tx_bytes;
534		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
535
536		s->rx_packets += tmp.rx_packets;
537		s->rx_bytes   += tmp.rx_bytes;
538		s->tx_packets += tmp.tx_packets;
539		s->tx_bytes   += tmp.tx_bytes;
540	}
541
542	s->rx_dropped = dev->stats.rx_dropped;
543	s->tx_dropped = dev->stats.tx_dropped;
544}
545
546static int xfrmi_get_iflink(const struct net_device *dev)
547{
548	struct xfrm_if *xi = netdev_priv(dev);
549
550	return xi->p.link;
551}
552
553
554static const struct net_device_ops xfrmi_netdev_ops = {
555	.ndo_init	= xfrmi_dev_init,
556	.ndo_uninit	= xfrmi_dev_uninit,
557	.ndo_start_xmit = xfrmi_xmit,
558	.ndo_get_stats64 = xfrmi_get_stats64,
559	.ndo_get_iflink = xfrmi_get_iflink,
560};
561
562static void xfrmi_dev_setup(struct net_device *dev)
563{
564	dev->netdev_ops 	= &xfrmi_netdev_ops;
 
565	dev->type		= ARPHRD_NONE;
566	dev->hard_header_len 	= ETH_HLEN;
567	dev->min_header_len	= ETH_HLEN;
568	dev->mtu		= ETH_DATA_LEN;
569	dev->min_mtu		= ETH_MIN_MTU;
570	dev->max_mtu		= ETH_DATA_LEN;
571	dev->addr_len		= ETH_ALEN;
572	dev->flags 		= IFF_NOARP;
573	dev->needs_free_netdev	= true;
574	dev->priv_destructor	= xfrmi_dev_free;
575	netif_keep_dst(dev);
576
577	eth_broadcast_addr(dev->broadcast);
578}
579
580static int xfrmi_dev_init(struct net_device *dev)
581{
582	struct xfrm_if *xi = netdev_priv(dev);
583	struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
584	int err;
585
586	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
587	if (!dev->tstats)
588		return -ENOMEM;
589
590	err = gro_cells_init(&xi->gro_cells, dev);
591	if (err) {
592		free_percpu(dev->tstats);
593		return err;
594	}
595
596	dev->features |= NETIF_F_LLTX;
597
598	if (phydev) {
599		dev->needed_headroom = phydev->needed_headroom;
600		dev->needed_tailroom = phydev->needed_tailroom;
601
602		if (is_zero_ether_addr(dev->dev_addr))
603			eth_hw_addr_inherit(dev, phydev);
604		if (is_zero_ether_addr(dev->broadcast))
605			memcpy(dev->broadcast, phydev->broadcast,
606			       dev->addr_len);
607	} else {
608		eth_hw_addr_random(dev);
609		eth_broadcast_addr(dev->broadcast);
610	}
611
612	return 0;
613}
614
615static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
616			 struct netlink_ext_ack *extack)
617{
618	return 0;
619}
620
621static void xfrmi_netlink_parms(struct nlattr *data[],
622			       struct xfrm_if_parms *parms)
623{
624	memset(parms, 0, sizeof(*parms));
625
626	if (!data)
627		return;
628
629	if (data[IFLA_XFRM_LINK])
630		parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
631
632	if (data[IFLA_XFRM_IF_ID])
633		parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
634}
635
636static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
637			struct nlattr *tb[], struct nlattr *data[],
638			struct netlink_ext_ack *extack)
639{
640	struct net *net = dev_net(dev);
641	struct xfrm_if_parms p;
642	struct xfrm_if *xi;
643	int err;
644
645	xfrmi_netlink_parms(data, &p);
646	xi = xfrmi_locate(net, &p);
647	if (xi)
648		return -EEXIST;
649
650	xi = netdev_priv(dev);
651	xi->p = p;
652	xi->net = net;
653	xi->dev = dev;
654
655	err = xfrmi_create(dev);
656	return err;
657}
658
659static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
660{
661	unregister_netdevice_queue(dev, head);
662}
663
664static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
665			   struct nlattr *data[],
666			   struct netlink_ext_ack *extack)
667{
668	struct xfrm_if *xi = netdev_priv(dev);
669	struct net *net = xi->net;
670	struct xfrm_if_parms p;
671
672	xfrmi_netlink_parms(data, &p);
673	xi = xfrmi_locate(net, &p);
674	if (!xi) {
675		xi = netdev_priv(dev);
676	} else {
677		if (xi->dev != dev)
678			return -EEXIST;
679	}
680
681	return xfrmi_update(xi, &p);
682}
683
684static size_t xfrmi_get_size(const struct net_device *dev)
685{
686	return
687		/* IFLA_XFRM_LINK */
688		nla_total_size(4) +
689		/* IFLA_XFRM_IF_ID */
690		nla_total_size(4) +
691		0;
692}
693
694static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
695{
696	struct xfrm_if *xi = netdev_priv(dev);
697	struct xfrm_if_parms *parm = &xi->p;
698
699	if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
700	    nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
701		goto nla_put_failure;
702	return 0;
703
704nla_put_failure:
705	return -EMSGSIZE;
706}
707
708static struct net *xfrmi_get_link_net(const struct net_device *dev)
709{
710	struct xfrm_if *xi = netdev_priv(dev);
711
712	return xi->net;
713}
714
715static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
716	[IFLA_XFRM_LINK]	= { .type = NLA_U32 },
717	[IFLA_XFRM_IF_ID]	= { .type = NLA_U32 },
718};
719
720static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
721	.kind		= "xfrm",
722	.maxtype	= IFLA_XFRM_MAX,
723	.policy		= xfrmi_policy,
724	.priv_size	= sizeof(struct xfrm_if),
725	.setup		= xfrmi_dev_setup,
726	.validate	= xfrmi_validate,
727	.newlink	= xfrmi_newlink,
728	.dellink	= xfrmi_dellink,
729	.changelink	= xfrmi_changelink,
730	.get_size	= xfrmi_get_size,
731	.fill_info	= xfrmi_fill_info,
732	.get_link_net	= xfrmi_get_link_net,
733};
734
735static void __net_exit xfrmi_destroy_interfaces(struct xfrmi_net *xfrmn)
736{
737	struct xfrm_if *xi;
738	LIST_HEAD(list);
739
740	xi = rtnl_dereference(xfrmn->xfrmi[0]);
741	if (!xi)
742		return;
743
744	unregister_netdevice_queue(xi->dev, &list);
745	unregister_netdevice_many(&list);
746}
747
748static void __net_exit xfrmi_exit_net(struct net *net)
749{
750	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
751
752	rtnl_lock();
753	xfrmi_destroy_interfaces(xfrmn);
 
 
 
 
 
 
 
 
 
 
 
 
 
754	rtnl_unlock();
755}
756
757static struct pernet_operations xfrmi_net_ops = {
758	.exit = xfrmi_exit_net,
759	.id   = &xfrmi_net_id,
760	.size = sizeof(struct xfrmi_net),
761};
762
763static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
764	.handler	=	xfrm6_rcv,
 
765	.cb_handler	=	xfrmi_rcv_cb,
766	.err_handler	=	xfrmi6_err,
767	.priority	=	10,
768};
769
770static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
771	.handler	=	xfrm6_rcv,
 
772	.cb_handler	=	xfrmi_rcv_cb,
773	.err_handler	=	xfrmi6_err,
774	.priority	=	10,
775};
776
777static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
778	.handler	=	xfrm6_rcv,
 
779	.cb_handler	=	xfrmi_rcv_cb,
780	.err_handler	=	xfrmi6_err,
781	.priority	=	10,
782};
783
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
785	.handler	=	xfrm4_rcv,
786	.input_handler	=	xfrm_input,
787	.cb_handler	=	xfrmi_rcv_cb,
788	.err_handler	=	xfrmi4_err,
789	.priority	=	10,
790};
791
792static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
793	.handler	=	xfrm4_rcv,
794	.input_handler	=	xfrm_input,
795	.cb_handler	=	xfrmi_rcv_cb,
796	.err_handler	=	xfrmi4_err,
797	.priority	=	10,
798};
799
800static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
801	.handler	=	xfrm4_rcv,
802	.input_handler	=	xfrm_input,
803	.cb_handler	=	xfrmi_rcv_cb,
804	.err_handler	=	xfrmi4_err,
805	.priority	=	10,
806};
807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808static int __init xfrmi4_init(void)
809{
810	int err;
811
812	err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
813	if (err < 0)
814		goto xfrm_proto_esp_failed;
815	err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
816	if (err < 0)
817		goto xfrm_proto_ah_failed;
818	err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
819	if (err < 0)
820		goto xfrm_proto_comp_failed;
 
 
 
 
 
 
 
 
821
822	return 0;
823
 
 
 
 
 
 
824xfrm_proto_comp_failed:
825	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
826xfrm_proto_ah_failed:
827	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
828xfrm_proto_esp_failed:
829	return err;
830}
831
832static void xfrmi4_fini(void)
833{
 
 
 
 
834	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
835	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
836	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
837}
838
839static int __init xfrmi6_init(void)
840{
841	int err;
842
843	err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
844	if (err < 0)
845		goto xfrm_proto_esp_failed;
846	err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
847	if (err < 0)
848		goto xfrm_proto_ah_failed;
849	err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
850	if (err < 0)
851		goto xfrm_proto_comp_failed;
 
 
 
 
 
 
 
 
852
853	return 0;
854
 
 
 
 
 
 
855xfrm_proto_comp_failed:
856	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
857xfrm_proto_ah_failed:
858	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
859xfrm_proto_esp_failed:
860	return err;
861}
862
863static void xfrmi6_fini(void)
864{
 
 
 
 
865	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
866	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
867	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
868}
869
870static const struct xfrm_if_cb xfrm_if_cb = {
871	.decode_session =	xfrmi_decode_session,
872};
873
874static int __init xfrmi_init(void)
875{
876	const char *msg;
877	int err;
878
879	pr_info("IPsec XFRM device driver\n");
880
881	msg = "tunnel device";
882	err = register_pernet_device(&xfrmi_net_ops);
883	if (err < 0)
884		goto pernet_dev_failed;
885
886	msg = "xfrm4 protocols";
887	err = xfrmi4_init();
888	if (err < 0)
889		goto xfrmi4_failed;
890
891	msg = "xfrm6 protocols";
892	err = xfrmi6_init();
893	if (err < 0)
894		goto xfrmi6_failed;
895
896
897	msg = "netlink interface";
898	err = rtnl_link_register(&xfrmi_link_ops);
899	if (err < 0)
900		goto rtnl_link_failed;
901
902	xfrm_if_register_cb(&xfrm_if_cb);
903
904	return err;
905
906rtnl_link_failed:
907	xfrmi6_fini();
908xfrmi6_failed:
909	xfrmi4_fini();
910xfrmi4_failed:
911	unregister_pernet_device(&xfrmi_net_ops);
912pernet_dev_failed:
913	pr_err("xfrmi init: failed to register %s\n", msg);
914	return err;
915}
916
917static void __exit xfrmi_fini(void)
918{
919	xfrm_if_unregister_cb();
920	rtnl_link_unregister(&xfrmi_link_ops);
921	xfrmi4_fini();
922	xfrmi6_fini();
923	unregister_pernet_device(&xfrmi_net_ops);
924}
925
926module_init(xfrmi_init);
927module_exit(xfrmi_fini);
928MODULE_LICENSE("GPL");
929MODULE_ALIAS_RTNL_LINK("xfrm");
930MODULE_ALIAS_NETDEV("xfrm0");
931MODULE_AUTHOR("Steffen Klassert");
932MODULE_DESCRIPTION("XFRM virtual interface");
v5.9
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *	XFRM virtual interface
   4 *
   5 *	Copyright (C) 2018 secunet Security Networks AG
   6 *
   7 *	Author:
   8 *	Steffen Klassert <steffen.klassert@secunet.com>
   9 */
  10
  11#include <linux/module.h>
  12#include <linux/capability.h>
  13#include <linux/errno.h>
  14#include <linux/types.h>
  15#include <linux/sockios.h>
  16#include <linux/icmp.h>
  17#include <linux/if.h>
  18#include <linux/in.h>
  19#include <linux/ip.h>
  20#include <linux/net.h>
  21#include <linux/in6.h>
  22#include <linux/netdevice.h>
  23#include <linux/if_link.h>
  24#include <linux/if_arp.h>
  25#include <linux/icmpv6.h>
  26#include <linux/init.h>
  27#include <linux/route.h>
  28#include <linux/rtnetlink.h>
  29#include <linux/netfilter_ipv6.h>
  30#include <linux/slab.h>
  31#include <linux/hash.h>
  32
  33#include <linux/uaccess.h>
  34#include <linux/atomic.h>
  35
  36#include <net/icmp.h>
  37#include <net/ip.h>
  38#include <net/ipv6.h>
  39#include <net/ip6_route.h>
  40#include <net/ip_tunnels.h>
  41#include <net/addrconf.h>
  42#include <net/xfrm.h>
  43#include <net/net_namespace.h>
  44#include <net/netns/generic.h>
  45#include <linux/etherdevice.h>
  46
  47static int xfrmi_dev_init(struct net_device *dev);
  48static void xfrmi_dev_setup(struct net_device *dev);
  49static struct rtnl_link_ops xfrmi_link_ops __read_mostly;
  50static unsigned int xfrmi_net_id __read_mostly;
  51static const struct net_device_ops xfrmi_netdev_ops;
  52
  53#define XFRMI_HASH_BITS	8
  54#define XFRMI_HASH_SIZE	BIT(XFRMI_HASH_BITS)
  55
  56struct xfrmi_net {
  57	/* lists for storing interfaces in use */
  58	struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE];
  59};
  60
  61#define for_each_xfrmi_rcu(start, xi) \
  62	for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next))
  63
  64static u32 xfrmi_hash(u32 if_id)
  65{
  66	return hash_32(if_id, XFRMI_HASH_BITS);
  67}
  68
  69static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x)
  70{
  71	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
  72	struct xfrm_if *xi;
  73
  74	for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) {
  75		if (x->if_id == xi->p.if_id &&
  76		    (xi->dev->flags & IFF_UP))
  77			return xi;
  78	}
  79
  80	return NULL;
  81}
  82
  83static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
  84					    unsigned short family)
  85{
  86	struct net_device *dev;
 
  87	int ifindex = 0;
  88
  89	if (!secpath_exists(skb) || !skb->dev)
  90		return NULL;
  91
  92	switch (family) {
  93	case AF_INET6:
  94		ifindex = inet6_sdif(skb);
  95		break;
  96	case AF_INET:
  97		ifindex = inet_sdif(skb);
  98		break;
  99	}
 
 
 100
 101	if (ifindex) {
 102		struct net *net = xs_net(xfrm_input_state(skb));
 103
 104		dev = dev_get_by_index_rcu(net, ifindex);
 105	} else {
 106		dev = skb->dev;
 
 107	}
 108
 109	if (!dev || !(dev->flags & IFF_UP))
 110		return NULL;
 111	if (dev->netdev_ops != &xfrmi_netdev_ops)
 112		return NULL;
 113
 114	return netdev_priv(dev);
 115}
 116
 117static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
 118{
 119	struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
 120
 121	rcu_assign_pointer(xi->next , rtnl_dereference(*xip));
 122	rcu_assign_pointer(*xip, xi);
 123}
 124
 125static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi)
 126{
 127	struct xfrm_if __rcu **xip;
 128	struct xfrm_if *iter;
 129
 130	for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)];
 131	     (iter = rtnl_dereference(*xip)) != NULL;
 132	     xip = &iter->next) {
 133		if (xi == iter) {
 134			rcu_assign_pointer(*xip, xi->next);
 135			break;
 136		}
 137	}
 138}
 139
 140static void xfrmi_dev_free(struct net_device *dev)
 141{
 142	struct xfrm_if *xi = netdev_priv(dev);
 143
 144	gro_cells_destroy(&xi->gro_cells);
 145	free_percpu(dev->tstats);
 146}
 147
 148static int xfrmi_create(struct net_device *dev)
 149{
 150	struct xfrm_if *xi = netdev_priv(dev);
 151	struct net *net = dev_net(dev);
 152	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 153	int err;
 154
 155	dev->rtnl_link_ops = &xfrmi_link_ops;
 156	err = register_netdevice(dev);
 157	if (err < 0)
 158		goto out;
 159
 
 160	xfrmi_link(xfrmn, xi);
 161
 162	return 0;
 163
 164out:
 165	return err;
 166}
 167
 168static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p)
 169{
 170	struct xfrm_if __rcu **xip;
 171	struct xfrm_if *xi;
 172	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 173
 174	for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)];
 175	     (xi = rtnl_dereference(*xip)) != NULL;
 176	     xip = &xi->next)
 177		if (xi->p.if_id == p->if_id)
 178			return xi;
 179
 180	return NULL;
 181}
 182
 183static void xfrmi_dev_uninit(struct net_device *dev)
 184{
 185	struct xfrm_if *xi = netdev_priv(dev);
 186	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
 187
 188	xfrmi_unlink(xfrmn, xi);
 
 189}
 190
 191static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
 192{
 193	skb->tstamp = 0;
 194	skb->pkt_type = PACKET_HOST;
 195	skb->skb_iif = 0;
 196	skb->ignore_df = 0;
 197	skb_dst_drop(skb);
 198	nf_reset_ct(skb);
 199	nf_reset_trace(skb);
 200
 201	if (!xnet)
 202		return;
 203
 204	ipvs_reset(skb);
 205	secpath_reset(skb);
 206	skb_orphan(skb);
 207	skb->mark = 0;
 208}
 209
 210static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
 211{
 212	const struct xfrm_mode *inner_mode;
 213	struct pcpu_sw_netstats *tstats;
 214	struct net_device *dev;
 215	struct xfrm_state *x;
 216	struct xfrm_if *xi;
 217	bool xnet;
 218
 219	if (err && !secpath_exists(skb))
 220		return 0;
 221
 222	x = xfrm_input_state(skb);
 223
 224	xi = xfrmi_lookup(xs_net(x), x);
 225	if (!xi)
 226		return 1;
 227
 228	dev = xi->dev;
 229	skb->dev = dev;
 230
 231	if (err) {
 232		dev->stats.rx_errors++;
 233		dev->stats.rx_dropped++;
 234
 235		return 0;
 236	}
 237
 238	xnet = !net_eq(xi->net, dev_net(skb->dev));
 239
 240	if (xnet) {
 241		inner_mode = &x->inner_mode;
 242
 243		if (x->sel.family == AF_UNSPEC) {
 244			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
 245			if (inner_mode == NULL) {
 246				XFRM_INC_STATS(dev_net(skb->dev),
 247					       LINUX_MIB_XFRMINSTATEMODEERROR);
 248				return -EINVAL;
 249			}
 250		}
 251
 252		if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
 253				       inner_mode->family))
 254			return -EPERM;
 255	}
 256
 257	xfrmi_scrub_packet(skb, xnet);
 258
 259	tstats = this_cpu_ptr(dev->tstats);
 260
 261	u64_stats_update_begin(&tstats->syncp);
 262	tstats->rx_packets++;
 263	tstats->rx_bytes += skb->len;
 264	u64_stats_update_end(&tstats->syncp);
 265
 266	return 0;
 267}
 268
 269static int
 270xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 271{
 272	struct xfrm_if *xi = netdev_priv(dev);
 273	struct net_device_stats *stats = &xi->dev->stats;
 274	struct dst_entry *dst = skb_dst(skb);
 275	unsigned int length = skb->len;
 276	struct net_device *tdev;
 277	struct xfrm_state *x;
 278	int err = -1;
 279	int mtu;
 280
 
 
 
 281	dst_hold(dst);
 282	dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id);
 283	if (IS_ERR(dst)) {
 284		err = PTR_ERR(dst);
 285		dst = NULL;
 286		goto tx_err_link_failure;
 287	}
 288
 289	x = dst->xfrm;
 290	if (!x)
 291		goto tx_err_link_failure;
 292
 293	if (x->if_id != xi->p.if_id)
 294		goto tx_err_link_failure;
 295
 296	tdev = dst->dev;
 297
 298	if (tdev == dev) {
 299		stats->collisions++;
 300		net_warn_ratelimited("%s: Local routing loop detected!\n",
 301				     dev->name);
 302		goto tx_err_dst_release;
 303	}
 304
 305	mtu = dst_mtu(dst);
 306	if (skb->len > mtu) {
 307		skb_dst_update_pmtu_no_confirm(skb, mtu);
 308
 309		if (skb->protocol == htons(ETH_P_IPV6)) {
 310			if (mtu < IPV6_MIN_MTU)
 311				mtu = IPV6_MIN_MTU;
 312
 313			icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 314		} else {
 315			icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 316				      htonl(mtu));
 317		}
 318
 319		dst_release(dst);
 320		return -EMSGSIZE;
 321	}
 322
 323	xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
 324	skb_dst_set(skb, dst);
 325	skb->dev = tdev;
 326
 327	err = dst_output(xi->net, skb->sk, skb);
 328	if (net_xmit_eval(err) == 0) {
 329		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
 330
 331		u64_stats_update_begin(&tstats->syncp);
 332		tstats->tx_bytes += length;
 333		tstats->tx_packets++;
 334		u64_stats_update_end(&tstats->syncp);
 335	} else {
 336		stats->tx_errors++;
 337		stats->tx_aborted_errors++;
 338	}
 339
 340	return 0;
 341tx_err_link_failure:
 342	stats->tx_carrier_errors++;
 343	dst_link_failure(skb);
 344tx_err_dst_release:
 345	dst_release(dst);
 346	return err;
 347}
 348
 349static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
 350{
 351	struct xfrm_if *xi = netdev_priv(dev);
 352	struct net_device_stats *stats = &xi->dev->stats;
 353	struct dst_entry *dst = skb_dst(skb);
 354	struct flowi fl;
 355	int ret;
 356
 357	memset(&fl, 0, sizeof(fl));
 358
 359	switch (skb->protocol) {
 360	case htons(ETH_P_IPV6):
 361		xfrm_decode_session(skb, &fl, AF_INET6);
 362		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 363		if (!dst) {
 364			fl.u.ip6.flowi6_oif = dev->ifindex;
 365			fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
 366			dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
 367			if (dst->error) {
 368				dst_release(dst);
 369				stats->tx_carrier_errors++;
 370				goto tx_err;
 371			}
 372			skb_dst_set(skb, dst);
 373		}
 374		break;
 375	case htons(ETH_P_IP):
 376		xfrm_decode_session(skb, &fl, AF_INET);
 377		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 378		if (!dst) {
 379			struct rtable *rt;
 380
 381			fl.u.ip4.flowi4_oif = dev->ifindex;
 382			fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
 383			rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
 384			if (IS_ERR(rt)) {
 385				stats->tx_carrier_errors++;
 386				goto tx_err;
 387			}
 388			skb_dst_set(skb, &rt->dst);
 389		}
 390		break;
 391	default:
 392		goto tx_err;
 393	}
 394
 395	fl.flowi_oif = xi->p.link;
 396
 397	ret = xfrmi_xmit2(skb, dev, &fl);
 398	if (ret < 0)
 399		goto tx_err;
 400
 401	return NETDEV_TX_OK;
 402
 403tx_err:
 404	stats->tx_errors++;
 405	stats->tx_dropped++;
 406	kfree_skb(skb);
 407	return NETDEV_TX_OK;
 408}
 409
 410static int xfrmi4_err(struct sk_buff *skb, u32 info)
 411{
 412	const struct iphdr *iph = (const struct iphdr *)skb->data;
 413	struct net *net = dev_net(skb->dev);
 414	int protocol = iph->protocol;
 415	struct ip_comp_hdr *ipch;
 416	struct ip_esp_hdr *esph;
 417	struct ip_auth_hdr *ah ;
 418	struct xfrm_state *x;
 419	struct xfrm_if *xi;
 420	__be32 spi;
 421
 422	switch (protocol) {
 423	case IPPROTO_ESP:
 424		esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2));
 425		spi = esph->spi;
 426		break;
 427	case IPPROTO_AH:
 428		ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2));
 429		spi = ah->spi;
 430		break;
 431	case IPPROTO_COMP:
 432		ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
 433		spi = htonl(ntohs(ipch->cpi));
 434		break;
 435	default:
 436		return 0;
 437	}
 438
 439	switch (icmp_hdr(skb)->type) {
 440	case ICMP_DEST_UNREACH:
 441		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 442			return 0;
 443	case ICMP_REDIRECT:
 444		break;
 445	default:
 446		return 0;
 447	}
 448
 449	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
 450			      spi, protocol, AF_INET);
 451	if (!x)
 452		return 0;
 453
 454	xi = xfrmi_lookup(net, x);
 455	if (!xi) {
 456		xfrm_state_put(x);
 457		return -1;
 458	}
 459
 460	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
 461		ipv4_update_pmtu(skb, net, info, 0, protocol);
 462	else
 463		ipv4_redirect(skb, net, 0, protocol);
 464	xfrm_state_put(x);
 465
 466	return 0;
 467}
 468
 469static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 470		    u8 type, u8 code, int offset, __be32 info)
 471{
 472	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
 473	struct net *net = dev_net(skb->dev);
 474	int protocol = iph->nexthdr;
 475	struct ip_comp_hdr *ipch;
 476	struct ip_esp_hdr *esph;
 477	struct ip_auth_hdr *ah;
 478	struct xfrm_state *x;
 479	struct xfrm_if *xi;
 480	__be32 spi;
 481
 482	switch (protocol) {
 483	case IPPROTO_ESP:
 484		esph = (struct ip_esp_hdr *)(skb->data + offset);
 485		spi = esph->spi;
 486		break;
 487	case IPPROTO_AH:
 488		ah = (struct ip_auth_hdr *)(skb->data + offset);
 489		spi = ah->spi;
 490		break;
 491	case IPPROTO_COMP:
 492		ipch = (struct ip_comp_hdr *)(skb->data + offset);
 493		spi = htonl(ntohs(ipch->cpi));
 494		break;
 495	default:
 496		return 0;
 497	}
 498
 499	if (type != ICMPV6_PKT_TOOBIG &&
 500	    type != NDISC_REDIRECT)
 501		return 0;
 502
 503	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,
 504			      spi, protocol, AF_INET6);
 505	if (!x)
 506		return 0;
 507
 508	xi = xfrmi_lookup(net, x);
 509	if (!xi) {
 510		xfrm_state_put(x);
 511		return -1;
 512	}
 513
 514	if (type == NDISC_REDIRECT)
 515		ip6_redirect(skb, net, skb->dev->ifindex, 0,
 516			     sock_net_uid(net, NULL));
 517	else
 518		ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
 519	xfrm_state_put(x);
 520
 521	return 0;
 522}
 523
 524static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p)
 525{
 526	if (xi->p.link != p->link)
 527		return -EINVAL;
 528
 529	xi->p.if_id = p->if_id;
 530
 531	return 0;
 532}
 533
 534static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p)
 535{
 536	struct net *net = xi->net;
 537	struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 538	int err;
 539
 540	xfrmi_unlink(xfrmn, xi);
 541	synchronize_net();
 542	err = xfrmi_change(xi, p);
 543	xfrmi_link(xfrmn, xi);
 544	netdev_state_change(xi->dev);
 545	return err;
 546}
 547
 548static void xfrmi_get_stats64(struct net_device *dev,
 549			       struct rtnl_link_stats64 *s)
 550{
 551	int cpu;
 552
 553	for_each_possible_cpu(cpu) {
 554		struct pcpu_sw_netstats *stats;
 555		struct pcpu_sw_netstats tmp;
 556		int start;
 557
 558		stats = per_cpu_ptr(dev->tstats, cpu);
 559		do {
 560			start = u64_stats_fetch_begin_irq(&stats->syncp);
 561			tmp.rx_packets = stats->rx_packets;
 562			tmp.rx_bytes   = stats->rx_bytes;
 563			tmp.tx_packets = stats->tx_packets;
 564			tmp.tx_bytes   = stats->tx_bytes;
 565		} while (u64_stats_fetch_retry_irq(&stats->syncp, start));
 566
 567		s->rx_packets += tmp.rx_packets;
 568		s->rx_bytes   += tmp.rx_bytes;
 569		s->tx_packets += tmp.tx_packets;
 570		s->tx_bytes   += tmp.tx_bytes;
 571	}
 572
 573	s->rx_dropped = dev->stats.rx_dropped;
 574	s->tx_dropped = dev->stats.tx_dropped;
 575}
 576
 577static int xfrmi_get_iflink(const struct net_device *dev)
 578{
 579	struct xfrm_if *xi = netdev_priv(dev);
 580
 581	return xi->p.link;
 582}
 583
 584
 585static const struct net_device_ops xfrmi_netdev_ops = {
 586	.ndo_init	= xfrmi_dev_init,
 587	.ndo_uninit	= xfrmi_dev_uninit,
 588	.ndo_start_xmit = xfrmi_xmit,
 589	.ndo_get_stats64 = xfrmi_get_stats64,
 590	.ndo_get_iflink = xfrmi_get_iflink,
 591};
 592
 593static void xfrmi_dev_setup(struct net_device *dev)
 594{
 595	dev->netdev_ops 	= &xfrmi_netdev_ops;
 596	dev->header_ops		= &ip_tunnel_header_ops;
 597	dev->type		= ARPHRD_NONE;
 
 
 598	dev->mtu		= ETH_DATA_LEN;
 599	dev->min_mtu		= ETH_MIN_MTU;
 600	dev->max_mtu		= IP_MAX_MTU;
 
 601	dev->flags 		= IFF_NOARP;
 602	dev->needs_free_netdev	= true;
 603	dev->priv_destructor	= xfrmi_dev_free;
 604	netif_keep_dst(dev);
 605
 606	eth_broadcast_addr(dev->broadcast);
 607}
 608
 609static int xfrmi_dev_init(struct net_device *dev)
 610{
 611	struct xfrm_if *xi = netdev_priv(dev);
 612	struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
 613	int err;
 614
 615	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 616	if (!dev->tstats)
 617		return -ENOMEM;
 618
 619	err = gro_cells_init(&xi->gro_cells, dev);
 620	if (err) {
 621		free_percpu(dev->tstats);
 622		return err;
 623	}
 624
 625	dev->features |= NETIF_F_LLTX;
 626
 627	if (phydev) {
 628		dev->needed_headroom = phydev->needed_headroom;
 629		dev->needed_tailroom = phydev->needed_tailroom;
 630
 631		if (is_zero_ether_addr(dev->dev_addr))
 632			eth_hw_addr_inherit(dev, phydev);
 633		if (is_zero_ether_addr(dev->broadcast))
 634			memcpy(dev->broadcast, phydev->broadcast,
 635			       dev->addr_len);
 636	} else {
 637		eth_hw_addr_random(dev);
 638		eth_broadcast_addr(dev->broadcast);
 639	}
 640
 641	return 0;
 642}
 643
 644static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[],
 645			 struct netlink_ext_ack *extack)
 646{
 647	return 0;
 648}
 649
 650static void xfrmi_netlink_parms(struct nlattr *data[],
 651			       struct xfrm_if_parms *parms)
 652{
 653	memset(parms, 0, sizeof(*parms));
 654
 655	if (!data)
 656		return;
 657
 658	if (data[IFLA_XFRM_LINK])
 659		parms->link = nla_get_u32(data[IFLA_XFRM_LINK]);
 660
 661	if (data[IFLA_XFRM_IF_ID])
 662		parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]);
 663}
 664
 665static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
 666			struct nlattr *tb[], struct nlattr *data[],
 667			struct netlink_ext_ack *extack)
 668{
 669	struct net *net = dev_net(dev);
 670	struct xfrm_if_parms p;
 671	struct xfrm_if *xi;
 672	int err;
 673
 674	xfrmi_netlink_parms(data, &p);
 675	xi = xfrmi_locate(net, &p);
 676	if (xi)
 677		return -EEXIST;
 678
 679	xi = netdev_priv(dev);
 680	xi->p = p;
 681	xi->net = net;
 682	xi->dev = dev;
 683
 684	err = xfrmi_create(dev);
 685	return err;
 686}
 687
 688static void xfrmi_dellink(struct net_device *dev, struct list_head *head)
 689{
 690	unregister_netdevice_queue(dev, head);
 691}
 692
 693static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
 694			   struct nlattr *data[],
 695			   struct netlink_ext_ack *extack)
 696{
 697	struct xfrm_if *xi = netdev_priv(dev);
 698	struct net *net = xi->net;
 699	struct xfrm_if_parms p;
 700
 701	xfrmi_netlink_parms(data, &p);
 702	xi = xfrmi_locate(net, &p);
 703	if (!xi) {
 704		xi = netdev_priv(dev);
 705	} else {
 706		if (xi->dev != dev)
 707			return -EEXIST;
 708	}
 709
 710	return xfrmi_update(xi, &p);
 711}
 712
 713static size_t xfrmi_get_size(const struct net_device *dev)
 714{
 715	return
 716		/* IFLA_XFRM_LINK */
 717		nla_total_size(4) +
 718		/* IFLA_XFRM_IF_ID */
 719		nla_total_size(4) +
 720		0;
 721}
 722
 723static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev)
 724{
 725	struct xfrm_if *xi = netdev_priv(dev);
 726	struct xfrm_if_parms *parm = &xi->p;
 727
 728	if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) ||
 729	    nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id))
 730		goto nla_put_failure;
 731	return 0;
 732
 733nla_put_failure:
 734	return -EMSGSIZE;
 735}
 736
 737static struct net *xfrmi_get_link_net(const struct net_device *dev)
 738{
 739	struct xfrm_if *xi = netdev_priv(dev);
 740
 741	return xi->net;
 742}
 743
 744static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
 745	[IFLA_XFRM_LINK]	= { .type = NLA_U32 },
 746	[IFLA_XFRM_IF_ID]	= { .type = NLA_U32 },
 747};
 748
 749static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
 750	.kind		= "xfrm",
 751	.maxtype	= IFLA_XFRM_MAX,
 752	.policy		= xfrmi_policy,
 753	.priv_size	= sizeof(struct xfrm_if),
 754	.setup		= xfrmi_dev_setup,
 755	.validate	= xfrmi_validate,
 756	.newlink	= xfrmi_newlink,
 757	.dellink	= xfrmi_dellink,
 758	.changelink	= xfrmi_changelink,
 759	.get_size	= xfrmi_get_size,
 760	.fill_info	= xfrmi_fill_info,
 761	.get_link_net	= xfrmi_get_link_net,
 762};
 763
 764static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list)
 765{
 766	struct net *net;
 767	LIST_HEAD(list);
 768
 
 
 
 
 
 
 
 
 
 
 
 
 769	rtnl_lock();
 770	list_for_each_entry(net, net_exit_list, exit_list) {
 771		struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
 772		struct xfrm_if __rcu **xip;
 773		struct xfrm_if *xi;
 774		int i;
 775
 776		for (i = 0; i < XFRMI_HASH_SIZE; i++) {
 777			for (xip = &xfrmn->xfrmi[i];
 778			     (xi = rtnl_dereference(*xip)) != NULL;
 779			     xip = &xi->next)
 780				unregister_netdevice_queue(xi->dev, &list);
 781		}
 782	}
 783	unregister_netdevice_many(&list);
 784	rtnl_unlock();
 785}
 786
 787static struct pernet_operations xfrmi_net_ops = {
 788	.exit_batch = xfrmi_exit_batch_net,
 789	.id   = &xfrmi_net_id,
 790	.size = sizeof(struct xfrmi_net),
 791};
 792
 793static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
 794	.handler	=	xfrm6_rcv,
 795	.input_handler	=	xfrm_input,
 796	.cb_handler	=	xfrmi_rcv_cb,
 797	.err_handler	=	xfrmi6_err,
 798	.priority	=	10,
 799};
 800
 801static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
 802	.handler	=	xfrm6_rcv,
 803	.input_handler	=	xfrm_input,
 804	.cb_handler	=	xfrmi_rcv_cb,
 805	.err_handler	=	xfrmi6_err,
 806	.priority	=	10,
 807};
 808
 809static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
 810	.handler	=	xfrm6_rcv,
 811	.input_handler	=	xfrm_input,
 812	.cb_handler	=	xfrmi_rcv_cb,
 813	.err_handler	=	xfrmi6_err,
 814	.priority	=	10,
 815};
 816
 817#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
 818static int xfrmi6_rcv_tunnel(struct sk_buff *skb)
 819{
 820	const xfrm_address_t *saddr;
 821	__be32 spi;
 822
 823	saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr;
 824	spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr);
 825
 826	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
 827}
 828
 829static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = {
 830	.handler	=	xfrmi6_rcv_tunnel,
 831	.cb_handler	=	xfrmi_rcv_cb,
 832	.err_handler	=	xfrmi6_err,
 833	.priority	=	-1,
 834};
 835
 836static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
 837	.handler	=	xfrmi6_rcv_tunnel,
 838	.cb_handler	=	xfrmi_rcv_cb,
 839	.err_handler	=	xfrmi6_err,
 840	.priority	=	-1,
 841};
 842#endif
 843
 844static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
 845	.handler	=	xfrm4_rcv,
 846	.input_handler	=	xfrm_input,
 847	.cb_handler	=	xfrmi_rcv_cb,
 848	.err_handler	=	xfrmi4_err,
 849	.priority	=	10,
 850};
 851
 852static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = {
 853	.handler	=	xfrm4_rcv,
 854	.input_handler	=	xfrm_input,
 855	.cb_handler	=	xfrmi_rcv_cb,
 856	.err_handler	=	xfrmi4_err,
 857	.priority	=	10,
 858};
 859
 860static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = {
 861	.handler	=	xfrm4_rcv,
 862	.input_handler	=	xfrm_input,
 863	.cb_handler	=	xfrmi_rcv_cb,
 864	.err_handler	=	xfrmi4_err,
 865	.priority	=	10,
 866};
 867
 868#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
 869static int xfrmi4_rcv_tunnel(struct sk_buff *skb)
 870{
 871	return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr);
 872}
 873
 874static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = {
 875	.handler	=	xfrmi4_rcv_tunnel,
 876	.cb_handler	=	xfrmi_rcv_cb,
 877	.err_handler	=	xfrmi4_err,
 878	.priority	=	-1,
 879};
 880
 881static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = {
 882	.handler	=	xfrmi4_rcv_tunnel,
 883	.cb_handler	=	xfrmi_rcv_cb,
 884	.err_handler	=	xfrmi4_err,
 885	.priority	=	-1,
 886};
 887#endif
 888
 889static int __init xfrmi4_init(void)
 890{
 891	int err;
 892
 893	err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP);
 894	if (err < 0)
 895		goto xfrm_proto_esp_failed;
 896	err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH);
 897	if (err < 0)
 898		goto xfrm_proto_ah_failed;
 899	err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
 900	if (err < 0)
 901		goto xfrm_proto_comp_failed;
 902#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
 903	err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET);
 904	if (err < 0)
 905		goto xfrm_tunnel_ipip_failed;
 906	err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6);
 907	if (err < 0)
 908		goto xfrm_tunnel_ipip6_failed;
 909#endif
 910
 911	return 0;
 912
 913#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
 914xfrm_tunnel_ipip6_failed:
 915	xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
 916xfrm_tunnel_ipip_failed:
 917	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
 918#endif
 919xfrm_proto_comp_failed:
 920	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
 921xfrm_proto_ah_failed:
 922	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
 923xfrm_proto_esp_failed:
 924	return err;
 925}
 926
 927static void xfrmi4_fini(void)
 928{
 929#if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL)
 930	xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6);
 931	xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET);
 932#endif
 933	xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP);
 934	xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH);
 935	xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP);
 936}
 937
 938static int __init xfrmi6_init(void)
 939{
 940	int err;
 941
 942	err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP);
 943	if (err < 0)
 944		goto xfrm_proto_esp_failed;
 945	err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH);
 946	if (err < 0)
 947		goto xfrm_proto_ah_failed;
 948	err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
 949	if (err < 0)
 950		goto xfrm_proto_comp_failed;
 951#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
 952	err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6);
 953	if (err < 0)
 954		goto xfrm_tunnel_ipv6_failed;
 955	err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET);
 956	if (err < 0)
 957		goto xfrm_tunnel_ip6ip_failed;
 958#endif
 959
 960	return 0;
 961
 962#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
 963xfrm_tunnel_ip6ip_failed:
 964	xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
 965xfrm_tunnel_ipv6_failed:
 966	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
 967#endif
 968xfrm_proto_comp_failed:
 969	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
 970xfrm_proto_ah_failed:
 971	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
 972xfrm_proto_esp_failed:
 973	return err;
 974}
 975
 976static void xfrmi6_fini(void)
 977{
 978#if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL)
 979	xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET);
 980	xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6);
 981#endif
 982	xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP);
 983	xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH);
 984	xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP);
 985}
 986
 987static const struct xfrm_if_cb xfrm_if_cb = {
 988	.decode_session =	xfrmi_decode_session,
 989};
 990
 991static int __init xfrmi_init(void)
 992{
 993	const char *msg;
 994	int err;
 995
 996	pr_info("IPsec XFRM device driver\n");
 997
 998	msg = "tunnel device";
 999	err = register_pernet_device(&xfrmi_net_ops);
1000	if (err < 0)
1001		goto pernet_dev_failed;
1002
1003	msg = "xfrm4 protocols";
1004	err = xfrmi4_init();
1005	if (err < 0)
1006		goto xfrmi4_failed;
1007
1008	msg = "xfrm6 protocols";
1009	err = xfrmi6_init();
1010	if (err < 0)
1011		goto xfrmi6_failed;
1012
1013
1014	msg = "netlink interface";
1015	err = rtnl_link_register(&xfrmi_link_ops);
1016	if (err < 0)
1017		goto rtnl_link_failed;
1018
1019	xfrm_if_register_cb(&xfrm_if_cb);
1020
1021	return err;
1022
1023rtnl_link_failed:
1024	xfrmi6_fini();
1025xfrmi6_failed:
1026	xfrmi4_fini();
1027xfrmi4_failed:
1028	unregister_pernet_device(&xfrmi_net_ops);
1029pernet_dev_failed:
1030	pr_err("xfrmi init: failed to register %s\n", msg);
1031	return err;
1032}
1033
1034static void __exit xfrmi_fini(void)
1035{
1036	xfrm_if_unregister_cb();
1037	rtnl_link_unregister(&xfrmi_link_ops);
1038	xfrmi4_fini();
1039	xfrmi6_fini();
1040	unregister_pernet_device(&xfrmi_net_ops);
1041}
1042
1043module_init(xfrmi_init);
1044module_exit(xfrmi_fini);
1045MODULE_LICENSE("GPL");
1046MODULE_ALIAS_RTNL_LINK("xfrm");
1047MODULE_ALIAS_NETDEV("xfrm0");
1048MODULE_AUTHOR("Steffen Klassert");
1049MODULE_DESCRIPTION("XFRM virtual interface");