Linux Audio

Check our new training course

Linux kernel drivers training

Mar 31-Apr 9, 2025, special US time zones
Register
Loading...
   1/*
   2 * xfrm_state.c
   3 *
   4 * Changes:
   5 *	Mitsuru KANDA @USAGI
   6 * 	Kazunori MIYAZAWA @USAGI
   7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
   8 * 		IPv6 support
   9 * 	YOSHIFUJI Hideaki @USAGI
  10 * 		Split up af-specific functions
  11 *	Derek Atkins <derek@ihtfp.com>
  12 *		Add UDP Encapsulation
  13 *
  14 */
  15
  16#include <linux/workqueue.h>
  17#include <net/xfrm.h>
  18#include <linux/pfkeyv2.h>
  19#include <linux/ipsec.h>
  20#include <linux/module.h>
  21#include <linux/cache.h>
  22#include <linux/audit.h>
  23#include <asm/uaccess.h>
  24#include <linux/ktime.h>
  25#include <linux/slab.h>
  26#include <linux/interrupt.h>
  27#include <linux/kernel.h>
  28
  29#include "xfrm_hash.h"
  30
  31/* Each xfrm_state may be linked to two tables:
  32
  33   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
  34   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
  35      destination/tunnel endpoint. (output)
  36 */
  37
  38static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
  39
  40static inline unsigned int xfrm_dst_hash(struct net *net,
  41					 const xfrm_address_t *daddr,
  42					 const xfrm_address_t *saddr,
  43					 u32 reqid,
  44					 unsigned short family)
  45{
  46	return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
  47}
  48
  49static inline unsigned int xfrm_src_hash(struct net *net,
  50					 const xfrm_address_t *daddr,
  51					 const xfrm_address_t *saddr,
  52					 unsigned short family)
  53{
  54	return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
  55}
  56
  57static inline unsigned int
  58xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
  59	      __be32 spi, u8 proto, unsigned short family)
  60{
  61	return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
  62}
  63
  64static void xfrm_hash_transfer(struct hlist_head *list,
  65			       struct hlist_head *ndsttable,
  66			       struct hlist_head *nsrctable,
  67			       struct hlist_head *nspitable,
  68			       unsigned int nhashmask)
  69{
  70	struct hlist_node *tmp;
  71	struct xfrm_state *x;
  72
  73	hlist_for_each_entry_safe(x, tmp, list, bydst) {
  74		unsigned int h;
  75
  76		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
  77				    x->props.reqid, x->props.family,
  78				    nhashmask);
  79		hlist_add_head(&x->bydst, ndsttable+h);
  80
  81		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
  82				    x->props.family,
  83				    nhashmask);
  84		hlist_add_head(&x->bysrc, nsrctable+h);
  85
  86		if (x->id.spi) {
  87			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
  88					    x->id.proto, x->props.family,
  89					    nhashmask);
  90			hlist_add_head(&x->byspi, nspitable+h);
  91		}
  92	}
  93}
  94
  95static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
  96{
  97	return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
  98}
  99
 100static DEFINE_MUTEX(hash_resize_mutex);
 101
 102static void xfrm_hash_resize(struct work_struct *work)
 103{
 104	struct net *net = container_of(work, struct net, xfrm.state_hash_work);
 105	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
 106	unsigned long nsize, osize;
 107	unsigned int nhashmask, ohashmask;
 108	int i;
 109
 110	mutex_lock(&hash_resize_mutex);
 111
 112	nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
 113	ndst = xfrm_hash_alloc(nsize);
 114	if (!ndst)
 115		goto out_unlock;
 116	nsrc = xfrm_hash_alloc(nsize);
 117	if (!nsrc) {
 118		xfrm_hash_free(ndst, nsize);
 119		goto out_unlock;
 120	}
 121	nspi = xfrm_hash_alloc(nsize);
 122	if (!nspi) {
 123		xfrm_hash_free(ndst, nsize);
 124		xfrm_hash_free(nsrc, nsize);
 125		goto out_unlock;
 126	}
 127
 128	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 129
 130	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
 131	for (i = net->xfrm.state_hmask; i >= 0; i--)
 132		xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
 133				   nhashmask);
 134
 135	odst = net->xfrm.state_bydst;
 136	osrc = net->xfrm.state_bysrc;
 137	ospi = net->xfrm.state_byspi;
 138	ohashmask = net->xfrm.state_hmask;
 139
 140	net->xfrm.state_bydst = ndst;
 141	net->xfrm.state_bysrc = nsrc;
 142	net->xfrm.state_byspi = nspi;
 143	net->xfrm.state_hmask = nhashmask;
 144
 145	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 146
 147	osize = (ohashmask + 1) * sizeof(struct hlist_head);
 148	xfrm_hash_free(odst, osize);
 149	xfrm_hash_free(osrc, osize);
 150	xfrm_hash_free(ospi, osize);
 151
 152out_unlock:
 153	mutex_unlock(&hash_resize_mutex);
 154}
 155
 156static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
 157static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
 158
 159static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 160
 161int __xfrm_state_delete(struct xfrm_state *x);
 162
 163int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 164bool km_is_alive(const struct km_event *c);
 165void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 166
 167static DEFINE_SPINLOCK(xfrm_type_lock);
 168int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
 169{
 170	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 171	const struct xfrm_type **typemap;
 172	int err = 0;
 173
 174	if (unlikely(afinfo == NULL))
 175		return -EAFNOSUPPORT;
 176	typemap = afinfo->type_map;
 177	spin_lock_bh(&xfrm_type_lock);
 178
 179	if (likely(typemap[type->proto] == NULL))
 180		typemap[type->proto] = type;
 181	else
 182		err = -EEXIST;
 183	spin_unlock_bh(&xfrm_type_lock);
 184	xfrm_state_put_afinfo(afinfo);
 185	return err;
 186}
 187EXPORT_SYMBOL(xfrm_register_type);
 188
 189int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
 190{
 191	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 192	const struct xfrm_type **typemap;
 193	int err = 0;
 194
 195	if (unlikely(afinfo == NULL))
 196		return -EAFNOSUPPORT;
 197	typemap = afinfo->type_map;
 198	spin_lock_bh(&xfrm_type_lock);
 199
 200	if (unlikely(typemap[type->proto] != type))
 201		err = -ENOENT;
 202	else
 203		typemap[type->proto] = NULL;
 204	spin_unlock_bh(&xfrm_type_lock);
 205	xfrm_state_put_afinfo(afinfo);
 206	return err;
 207}
 208EXPORT_SYMBOL(xfrm_unregister_type);
 209
 210static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
 211{
 212	struct xfrm_state_afinfo *afinfo;
 213	const struct xfrm_type **typemap;
 214	const struct xfrm_type *type;
 215	int modload_attempted = 0;
 216
 217retry:
 218	afinfo = xfrm_state_get_afinfo(family);
 219	if (unlikely(afinfo == NULL))
 220		return NULL;
 221	typemap = afinfo->type_map;
 222
 223	type = typemap[proto];
 224	if (unlikely(type && !try_module_get(type->owner)))
 225		type = NULL;
 226	if (!type && !modload_attempted) {
 227		xfrm_state_put_afinfo(afinfo);
 228		request_module("xfrm-type-%d-%d", family, proto);
 229		modload_attempted = 1;
 230		goto retry;
 231	}
 232
 233	xfrm_state_put_afinfo(afinfo);
 234	return type;
 235}
 236
 237static void xfrm_put_type(const struct xfrm_type *type)
 238{
 239	module_put(type->owner);
 240}
 241
 242static DEFINE_SPINLOCK(xfrm_mode_lock);
 243int xfrm_register_mode(struct xfrm_mode *mode, int family)
 244{
 245	struct xfrm_state_afinfo *afinfo;
 246	struct xfrm_mode **modemap;
 247	int err;
 248
 249	if (unlikely(mode->encap >= XFRM_MODE_MAX))
 250		return -EINVAL;
 251
 252	afinfo = xfrm_state_get_afinfo(family);
 253	if (unlikely(afinfo == NULL))
 254		return -EAFNOSUPPORT;
 255
 256	err = -EEXIST;
 257	modemap = afinfo->mode_map;
 258	spin_lock_bh(&xfrm_mode_lock);
 259	if (modemap[mode->encap])
 260		goto out;
 261
 262	err = -ENOENT;
 263	if (!try_module_get(afinfo->owner))
 264		goto out;
 265
 266	mode->afinfo = afinfo;
 267	modemap[mode->encap] = mode;
 268	err = 0;
 269
 270out:
 271	spin_unlock_bh(&xfrm_mode_lock);
 272	xfrm_state_put_afinfo(afinfo);
 273	return err;
 274}
 275EXPORT_SYMBOL(xfrm_register_mode);
 276
 277int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
 278{
 279	struct xfrm_state_afinfo *afinfo;
 280	struct xfrm_mode **modemap;
 281	int err;
 282
 283	if (unlikely(mode->encap >= XFRM_MODE_MAX))
 284		return -EINVAL;
 285
 286	afinfo = xfrm_state_get_afinfo(family);
 287	if (unlikely(afinfo == NULL))
 288		return -EAFNOSUPPORT;
 289
 290	err = -ENOENT;
 291	modemap = afinfo->mode_map;
 292	spin_lock_bh(&xfrm_mode_lock);
 293	if (likely(modemap[mode->encap] == mode)) {
 294		modemap[mode->encap] = NULL;
 295		module_put(mode->afinfo->owner);
 296		err = 0;
 297	}
 298
 299	spin_unlock_bh(&xfrm_mode_lock);
 300	xfrm_state_put_afinfo(afinfo);
 301	return err;
 302}
 303EXPORT_SYMBOL(xfrm_unregister_mode);
 304
 305static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
 306{
 307	struct xfrm_state_afinfo *afinfo;
 308	struct xfrm_mode *mode;
 309	int modload_attempted = 0;
 310
 311	if (unlikely(encap >= XFRM_MODE_MAX))
 312		return NULL;
 313
 314retry:
 315	afinfo = xfrm_state_get_afinfo(family);
 316	if (unlikely(afinfo == NULL))
 317		return NULL;
 318
 319	mode = afinfo->mode_map[encap];
 320	if (unlikely(mode && !try_module_get(mode->owner)))
 321		mode = NULL;
 322	if (!mode && !modload_attempted) {
 323		xfrm_state_put_afinfo(afinfo);
 324		request_module("xfrm-mode-%d-%d", family, encap);
 325		modload_attempted = 1;
 326		goto retry;
 327	}
 328
 329	xfrm_state_put_afinfo(afinfo);
 330	return mode;
 331}
 332
 333static void xfrm_put_mode(struct xfrm_mode *mode)
 334{
 335	module_put(mode->owner);
 336}
 337
 338static void xfrm_state_gc_destroy(struct xfrm_state *x)
 339{
 340	tasklet_hrtimer_cancel(&x->mtimer);
 341	del_timer_sync(&x->rtimer);
 342	kfree(x->aalg);
 343	kfree(x->ealg);
 344	kfree(x->calg);
 345	kfree(x->encap);
 346	kfree(x->coaddr);
 347	kfree(x->replay_esn);
 348	kfree(x->preplay_esn);
 349	if (x->inner_mode)
 350		xfrm_put_mode(x->inner_mode);
 351	if (x->inner_mode_iaf)
 352		xfrm_put_mode(x->inner_mode_iaf);
 353	if (x->outer_mode)
 354		xfrm_put_mode(x->outer_mode);
 355	if (x->type) {
 356		x->type->destructor(x);
 357		xfrm_put_type(x->type);
 358	}
 359	security_xfrm_state_free(x);
 360	kfree(x);
 361}
 362
 363static void xfrm_state_gc_task(struct work_struct *work)
 364{
 365	struct net *net = container_of(work, struct net, xfrm.state_gc_work);
 366	struct xfrm_state *x;
 367	struct hlist_node *tmp;
 368	struct hlist_head gc_list;
 369
 370	spin_lock_bh(&xfrm_state_gc_lock);
 371	hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
 372	spin_unlock_bh(&xfrm_state_gc_lock);
 373
 374	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
 375		xfrm_state_gc_destroy(x);
 376}
 377
 378static inline unsigned long make_jiffies(long secs)
 379{
 380	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
 381		return MAX_SCHEDULE_TIMEOUT-1;
 382	else
 383		return secs*HZ;
 384}
 385
 386static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 387{
 388	struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
 389	struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
 390	unsigned long now = get_seconds();
 391	long next = LONG_MAX;
 392	int warn = 0;
 393	int err = 0;
 394
 395	spin_lock(&x->lock);
 396	if (x->km.state == XFRM_STATE_DEAD)
 397		goto out;
 398	if (x->km.state == XFRM_STATE_EXPIRED)
 399		goto expired;
 400	if (x->lft.hard_add_expires_seconds) {
 401		long tmo = x->lft.hard_add_expires_seconds +
 402			x->curlft.add_time - now;
 403		if (tmo <= 0) {
 404			if (x->xflags & XFRM_SOFT_EXPIRE) {
 405				/* enter hard expire without soft expire first?!
 406				 * setting a new date could trigger this.
 407				 * workarbound: fix x->curflt.add_time by below:
 408				 */
 409				x->curlft.add_time = now - x->saved_tmo - 1;
 410				tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
 411			} else
 412				goto expired;
 413		}
 414		if (tmo < next)
 415			next = tmo;
 416	}
 417	if (x->lft.hard_use_expires_seconds) {
 418		long tmo = x->lft.hard_use_expires_seconds +
 419			(x->curlft.use_time ? : now) - now;
 420		if (tmo <= 0)
 421			goto expired;
 422		if (tmo < next)
 423			next = tmo;
 424	}
 425	if (x->km.dying)
 426		goto resched;
 427	if (x->lft.soft_add_expires_seconds) {
 428		long tmo = x->lft.soft_add_expires_seconds +
 429			x->curlft.add_time - now;
 430		if (tmo <= 0) {
 431			warn = 1;
 432			x->xflags &= ~XFRM_SOFT_EXPIRE;
 433		} else if (tmo < next) {
 434			next = tmo;
 435			x->xflags |= XFRM_SOFT_EXPIRE;
 436			x->saved_tmo = tmo;
 437		}
 438	}
 439	if (x->lft.soft_use_expires_seconds) {
 440		long tmo = x->lft.soft_use_expires_seconds +
 441			(x->curlft.use_time ? : now) - now;
 442		if (tmo <= 0)
 443			warn = 1;
 444		else if (tmo < next)
 445			next = tmo;
 446	}
 447
 448	x->km.dying = warn;
 449	if (warn)
 450		km_state_expired(x, 0, 0);
 451resched:
 452	if (next != LONG_MAX) {
 453		tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
 454	}
 455
 456	goto out;
 457
 458expired:
 459	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
 460		x->km.state = XFRM_STATE_EXPIRED;
 461
 462	err = __xfrm_state_delete(x);
 463	if (!err)
 464		km_state_expired(x, 1, 0);
 465
 466	xfrm_audit_state_delete(x, err ? 0 : 1,
 467				audit_get_loginuid(current),
 468				audit_get_sessionid(current), 0);
 469
 470out:
 471	spin_unlock(&x->lock);
 472	return HRTIMER_NORESTART;
 473}
 474
 475static void xfrm_replay_timer_handler(unsigned long data);
 476
 477struct xfrm_state *xfrm_state_alloc(struct net *net)
 478{
 479	struct xfrm_state *x;
 480
 481	x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
 482
 483	if (x) {
 484		write_pnet(&x->xs_net, net);
 485		atomic_set(&x->refcnt, 1);
 486		atomic_set(&x->tunnel_users, 0);
 487		INIT_LIST_HEAD(&x->km.all);
 488		INIT_HLIST_NODE(&x->bydst);
 489		INIT_HLIST_NODE(&x->bysrc);
 490		INIT_HLIST_NODE(&x->byspi);
 491		tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
 492					CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
 493		setup_timer(&x->rtimer, xfrm_replay_timer_handler,
 494				(unsigned long)x);
 495		x->curlft.add_time = get_seconds();
 496		x->lft.soft_byte_limit = XFRM_INF;
 497		x->lft.soft_packet_limit = XFRM_INF;
 498		x->lft.hard_byte_limit = XFRM_INF;
 499		x->lft.hard_packet_limit = XFRM_INF;
 500		x->replay_maxage = 0;
 501		x->replay_maxdiff = 0;
 502		x->inner_mode = NULL;
 503		x->inner_mode_iaf = NULL;
 504		spin_lock_init(&x->lock);
 505	}
 506	return x;
 507}
 508EXPORT_SYMBOL(xfrm_state_alloc);
 509
 510void __xfrm_state_destroy(struct xfrm_state *x)
 511{
 512	struct net *net = xs_net(x);
 513
 514	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 515
 516	spin_lock_bh(&xfrm_state_gc_lock);
 517	hlist_add_head(&x->gclist, &net->xfrm.state_gc_list);
 518	spin_unlock_bh(&xfrm_state_gc_lock);
 519	schedule_work(&net->xfrm.state_gc_work);
 520}
 521EXPORT_SYMBOL(__xfrm_state_destroy);
 522
 523int __xfrm_state_delete(struct xfrm_state *x)
 524{
 525	struct net *net = xs_net(x);
 526	int err = -ESRCH;
 527
 528	if (x->km.state != XFRM_STATE_DEAD) {
 529		x->km.state = XFRM_STATE_DEAD;
 530		spin_lock(&net->xfrm.xfrm_state_lock);
 531		list_del(&x->km.all);
 532		hlist_del(&x->bydst);
 533		hlist_del(&x->bysrc);
 534		if (x->id.spi)
 535			hlist_del(&x->byspi);
 536		net->xfrm.state_num--;
 537		spin_unlock(&net->xfrm.xfrm_state_lock);
 538
 539		/* All xfrm_state objects are created by xfrm_state_alloc.
 540		 * The xfrm_state_alloc call gives a reference, and that
 541		 * is what we are dropping here.
 542		 */
 543		xfrm_state_put(x);
 544		err = 0;
 545	}
 546
 547	return err;
 548}
 549EXPORT_SYMBOL(__xfrm_state_delete);
 550
 551int xfrm_state_delete(struct xfrm_state *x)
 552{
 553	int err;
 554
 555	spin_lock_bh(&x->lock);
 556	err = __xfrm_state_delete(x);
 557	spin_unlock_bh(&x->lock);
 558
 559	return err;
 560}
 561EXPORT_SYMBOL(xfrm_state_delete);
 562
 563#ifdef CONFIG_SECURITY_NETWORK_XFRM
 564static inline int
 565xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 566{
 567	int i, err = 0;
 568
 569	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 570		struct xfrm_state *x;
 571
 572		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 573			if (xfrm_id_proto_match(x->id.proto, proto) &&
 574			   (err = security_xfrm_state_delete(x)) != 0) {
 575				xfrm_audit_state_delete(x, 0,
 576							audit_info->loginuid,
 577							audit_info->sessionid,
 578							audit_info->secid);
 579				return err;
 580			}
 581		}
 582	}
 583
 584	return err;
 585}
 586#else
 587static inline int
 588xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 589{
 590	return 0;
 591}
 592#endif
 593
 594int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info)
 595{
 596	int i, err = 0, cnt = 0;
 597
 598	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 599	err = xfrm_state_flush_secctx_check(net, proto, audit_info);
 600	if (err)
 601		goto out;
 602
 603	err = -ESRCH;
 604	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 605		struct xfrm_state *x;
 606restart:
 607		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 608			if (!xfrm_state_kern(x) &&
 609			    xfrm_id_proto_match(x->id.proto, proto)) {
 610				xfrm_state_hold(x);
 611				spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 612
 613				err = xfrm_state_delete(x);
 614				xfrm_audit_state_delete(x, err ? 0 : 1,
 615							audit_info->loginuid,
 616							audit_info->sessionid,
 617							audit_info->secid);
 618				xfrm_state_put(x);
 619				if (!err)
 620					cnt++;
 621
 622				spin_lock_bh(&net->xfrm.xfrm_state_lock);
 623				goto restart;
 624			}
 625		}
 626	}
 627	if (cnt)
 628		err = 0;
 629
 630out:
 631	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 632	return err;
 633}
 634EXPORT_SYMBOL(xfrm_state_flush);
 635
 636void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 637{
 638	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 639	si->sadcnt = net->xfrm.state_num;
 640	si->sadhcnt = net->xfrm.state_hmask;
 641	si->sadhmcnt = xfrm_state_hashmax;
 642	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 643}
 644EXPORT_SYMBOL(xfrm_sad_getinfo);
 645
 646static int
 647xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
 648		    const struct xfrm_tmpl *tmpl,
 649		    const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 650		    unsigned short family)
 651{
 652	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 653	if (!afinfo)
 654		return -1;
 655	afinfo->init_tempsel(&x->sel, fl);
 656
 657	if (family != tmpl->encap_family) {
 658		xfrm_state_put_afinfo(afinfo);
 659		afinfo = xfrm_state_get_afinfo(tmpl->encap_family);
 660		if (!afinfo)
 661			return -1;
 662	}
 663	afinfo->init_temprop(x, tmpl, daddr, saddr);
 664	xfrm_state_put_afinfo(afinfo);
 665	return 0;
 666}
 667
 668static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
 669					      const xfrm_address_t *daddr,
 670					      __be32 spi, u8 proto,
 671					      unsigned short family)
 672{
 673	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 674	struct xfrm_state *x;
 675
 676	hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
 677		if (x->props.family != family ||
 678		    x->id.spi       != spi ||
 679		    x->id.proto     != proto ||
 680		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
 681			continue;
 682
 683		if ((mark & x->mark.m) != x->mark.v)
 684			continue;
 685		xfrm_state_hold(x);
 686		return x;
 687	}
 688
 689	return NULL;
 690}
 691
 692static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
 693						     const xfrm_address_t *daddr,
 694						     const xfrm_address_t *saddr,
 695						     u8 proto, unsigned short family)
 696{
 697	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 698	struct xfrm_state *x;
 699
 700	hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
 701		if (x->props.family != family ||
 702		    x->id.proto     != proto ||
 703		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
 704		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
 705			continue;
 706
 707		if ((mark & x->mark.m) != x->mark.v)
 708			continue;
 709		xfrm_state_hold(x);
 710		return x;
 711	}
 712
 713	return NULL;
 714}
 715
 716static inline struct xfrm_state *
 717__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
 718{
 719	struct net *net = xs_net(x);
 720	u32 mark = x->mark.v & x->mark.m;
 721
 722	if (use_spi)
 723		return __xfrm_state_lookup(net, mark, &x->id.daddr,
 724					   x->id.spi, x->id.proto, family);
 725	else
 726		return __xfrm_state_lookup_byaddr(net, mark,
 727						  &x->id.daddr,
 728						  &x->props.saddr,
 729						  x->id.proto, family);
 730}
 731
 732static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
 733{
 734	if (have_hash_collision &&
 735	    (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
 736	    net->xfrm.state_num > net->xfrm.state_hmask)
 737		schedule_work(&net->xfrm.state_hash_work);
 738}
 739
 740static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
 741			       const struct flowi *fl, unsigned short family,
 742			       struct xfrm_state **best, int *acq_in_progress,
 743			       int *error)
 744{
 745	/* Resolution logic:
 746	 * 1. There is a valid state with matching selector. Done.
 747	 * 2. Valid state with inappropriate selector. Skip.
 748	 *
 749	 * Entering area of "sysdeps".
 750	 *
 751	 * 3. If state is not valid, selector is temporary, it selects
 752	 *    only session which triggered previous resolution. Key
 753	 *    manager will do something to install a state with proper
 754	 *    selector.
 755	 */
 756	if (x->km.state == XFRM_STATE_VALID) {
 757		if ((x->sel.family &&
 758		     !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
 759		    !security_xfrm_state_pol_flow_match(x, pol, fl))
 760			return;
 761
 762		if (!*best ||
 763		    (*best)->km.dying > x->km.dying ||
 764		    ((*best)->km.dying == x->km.dying &&
 765		     (*best)->curlft.add_time < x->curlft.add_time))
 766			*best = x;
 767	} else if (x->km.state == XFRM_STATE_ACQ) {
 768		*acq_in_progress = 1;
 769	} else if (x->km.state == XFRM_STATE_ERROR ||
 770		   x->km.state == XFRM_STATE_EXPIRED) {
 771		if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
 772		    security_xfrm_state_pol_flow_match(x, pol, fl))
 773			*error = -ESRCH;
 774	}
 775}
 776
 777struct xfrm_state *
 778xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 779		const struct flowi *fl, struct xfrm_tmpl *tmpl,
 780		struct xfrm_policy *pol, int *err,
 781		unsigned short family)
 782{
 783	static xfrm_address_t saddr_wildcard = { };
 784	struct net *net = xp_net(pol);
 785	unsigned int h, h_wildcard;
 786	struct xfrm_state *x, *x0, *to_put;
 787	int acquire_in_progress = 0;
 788	int error = 0;
 789	struct xfrm_state *best = NULL;
 790	u32 mark = pol->mark.v & pol->mark.m;
 791	unsigned short encap_family = tmpl->encap_family;
 792	struct km_event c;
 793
 794	to_put = NULL;
 795
 796	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 797	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
 798	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 799		if (x->props.family == encap_family &&
 800		    x->props.reqid == tmpl->reqid &&
 801		    (mark & x->mark.m) == x->mark.v &&
 802		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 803		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
 804		    tmpl->mode == x->props.mode &&
 805		    tmpl->id.proto == x->id.proto &&
 806		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
 807			xfrm_state_look_at(pol, x, fl, encap_family,
 808					   &best, &acquire_in_progress, &error);
 809	}
 810	if (best || acquire_in_progress)
 811		goto found;
 812
 813	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
 814	hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
 815		if (x->props.family == encap_family &&
 816		    x->props.reqid == tmpl->reqid &&
 817		    (mark & x->mark.m) == x->mark.v &&
 818		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 819		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
 820		    tmpl->mode == x->props.mode &&
 821		    tmpl->id.proto == x->id.proto &&
 822		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
 823			xfrm_state_look_at(pol, x, fl, encap_family,
 824					   &best, &acquire_in_progress, &error);
 825	}
 826
 827found:
 828	x = best;
 829	if (!x && !error && !acquire_in_progress) {
 830		if (tmpl->id.spi &&
 831		    (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
 832					      tmpl->id.proto, encap_family)) != NULL) {
 833			to_put = x0;
 834			error = -EEXIST;
 835			goto out;
 836		}
 837
 838		c.net = net;
 839		/* If the KMs have no listeners (yet...), avoid allocating an SA
 840		 * for each and every packet - garbage collection might not
 841		 * handle the flood.
 842		 */
 843		if (!km_is_alive(&c)) {
 844			error = -ESRCH;
 845			goto out;
 846		}
 847
 848		x = xfrm_state_alloc(net);
 849		if (x == NULL) {
 850			error = -ENOMEM;
 851			goto out;
 852		}
 853		/* Initialize temporary state matching only
 854		 * to current session. */
 855		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
 856		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
 857
 858		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
 859		if (error) {
 860			x->km.state = XFRM_STATE_DEAD;
 861			to_put = x;
 862			x = NULL;
 863			goto out;
 864		}
 865
 866		if (km_query(x, tmpl, pol) == 0) {
 867			x->km.state = XFRM_STATE_ACQ;
 868			list_add(&x->km.all, &net->xfrm.state_all);
 869			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
 870			h = xfrm_src_hash(net, daddr, saddr, encap_family);
 871			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
 872			if (x->id.spi) {
 873				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
 874				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
 875			}
 876			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 877			tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
 878			net->xfrm.state_num++;
 879			xfrm_hash_grow_check(net, x->bydst.next != NULL);
 880		} else {
 881			x->km.state = XFRM_STATE_DEAD;
 882			to_put = x;
 883			x = NULL;
 884			error = -ESRCH;
 885		}
 886	}
 887out:
 888	if (x)
 889		xfrm_state_hold(x);
 890	else
 891		*err = acquire_in_progress ? -EAGAIN : error;
 892	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 893	if (to_put)
 894		xfrm_state_put(to_put);
 895	return x;
 896}
 897
 898struct xfrm_state *
 899xfrm_stateonly_find(struct net *net, u32 mark,
 900		    xfrm_address_t *daddr, xfrm_address_t *saddr,
 901		    unsigned short family, u8 mode, u8 proto, u32 reqid)
 902{
 903	unsigned int h;
 904	struct xfrm_state *rx = NULL, *x = NULL;
 905
 906	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 907	h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
 908	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 909		if (x->props.family == family &&
 910		    x->props.reqid == reqid &&
 911		    (mark & x->mark.m) == x->mark.v &&
 912		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
 913		    xfrm_state_addr_check(x, daddr, saddr, family) &&
 914		    mode == x->props.mode &&
 915		    proto == x->id.proto &&
 916		    x->km.state == XFRM_STATE_VALID) {
 917			rx = x;
 918			break;
 919		}
 920	}
 921
 922	if (rx)
 923		xfrm_state_hold(rx);
 924	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 925
 926
 927	return rx;
 928}
 929EXPORT_SYMBOL(xfrm_stateonly_find);
 930
 931struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
 932					      unsigned short family)
 933{
 934	struct xfrm_state *x;
 935	struct xfrm_state_walk *w;
 936
 937	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 938	list_for_each_entry(w, &net->xfrm.state_all, all) {
 939		x = container_of(w, struct xfrm_state, km);
 940		if (x->props.family != family ||
 941			x->id.spi != spi)
 942			continue;
 943
 944		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 945		xfrm_state_hold(x);
 946		return x;
 947	}
 948	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 949	return NULL;
 950}
 951EXPORT_SYMBOL(xfrm_state_lookup_byspi);
 952
 953static void __xfrm_state_insert(struct xfrm_state *x)
 954{
 955	struct net *net = xs_net(x);
 956	unsigned int h;
 957
 958	list_add(&x->km.all, &net->xfrm.state_all);
 959
 960	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
 961			  x->props.reqid, x->props.family);
 962	hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
 963
 964	h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
 965	hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
 966
 967	if (x->id.spi) {
 968		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
 969				  x->props.family);
 970
 971		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
 972	}
 973
 974	tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
 975	if (x->replay_maxage)
 976		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
 977
 978	net->xfrm.state_num++;
 979
 980	xfrm_hash_grow_check(net, x->bydst.next != NULL);
 981}
 982
 983/* net->xfrm.xfrm_state_lock is held */
 984static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 985{
 986	struct net *net = xs_net(xnew);
 987	unsigned short family = xnew->props.family;
 988	u32 reqid = xnew->props.reqid;
 989	struct xfrm_state *x;
 990	unsigned int h;
 991	u32 mark = xnew->mark.v & xnew->mark.m;
 992
 993	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
 994	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
 995		if (x->props.family	== family &&
 996		    x->props.reqid	== reqid &&
 997		    (mark & x->mark.m) == x->mark.v &&
 998		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
 999		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
1000			x->genid++;
1001	}
1002}
1003
1004void xfrm_state_insert(struct xfrm_state *x)
1005{
1006	struct net *net = xs_net(x);
1007
1008	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1009	__xfrm_state_bump_genids(x);
1010	__xfrm_state_insert(x);
1011	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1012}
1013EXPORT_SYMBOL(xfrm_state_insert);
1014
1015/* net->xfrm.xfrm_state_lock is held */
1016static struct xfrm_state *__find_acq_core(struct net *net,
1017					  const struct xfrm_mark *m,
1018					  unsigned short family, u8 mode,
1019					  u32 reqid, u8 proto,
1020					  const xfrm_address_t *daddr,
1021					  const xfrm_address_t *saddr,
1022					  int create)
1023{
1024	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1025	struct xfrm_state *x;
1026	u32 mark = m->v & m->m;
1027
1028	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1029		if (x->props.reqid  != reqid ||
1030		    x->props.mode   != mode ||
1031		    x->props.family != family ||
1032		    x->km.state     != XFRM_STATE_ACQ ||
1033		    x->id.spi       != 0 ||
1034		    x->id.proto	    != proto ||
1035		    (mark & x->mark.m) != x->mark.v ||
1036		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1037		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
1038			continue;
1039
1040		xfrm_state_hold(x);
1041		return x;
1042	}
1043
1044	if (!create)
1045		return NULL;
1046
1047	x = xfrm_state_alloc(net);
1048	if (likely(x)) {
1049		switch (family) {
1050		case AF_INET:
1051			x->sel.daddr.a4 = daddr->a4;
1052			x->sel.saddr.a4 = saddr->a4;
1053			x->sel.prefixlen_d = 32;
1054			x->sel.prefixlen_s = 32;
1055			x->props.saddr.a4 = saddr->a4;
1056			x->id.daddr.a4 = daddr->a4;
1057			break;
1058
1059		case AF_INET6:
1060			*(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr;
1061			*(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr;
1062			x->sel.prefixlen_d = 128;
1063			x->sel.prefixlen_s = 128;
1064			*(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr;
1065			*(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr;
1066			break;
1067		}
1068
1069		x->km.state = XFRM_STATE_ACQ;
1070		x->id.proto = proto;
1071		x->props.family = family;
1072		x->props.mode = mode;
1073		x->props.reqid = reqid;
1074		x->mark.v = m->v;
1075		x->mark.m = m->m;
1076		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1077		xfrm_state_hold(x);
1078		tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1079		list_add(&x->km.all, &net->xfrm.state_all);
1080		hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
1081		h = xfrm_src_hash(net, daddr, saddr, family);
1082		hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
1083
1084		net->xfrm.state_num++;
1085
1086		xfrm_hash_grow_check(net, x->bydst.next != NULL);
1087	}
1088
1089	return x;
1090}
1091
1092static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1093
1094int xfrm_state_add(struct xfrm_state *x)
1095{
1096	struct net *net = xs_net(x);
1097	struct xfrm_state *x1, *to_put;
1098	int family;
1099	int err;
1100	u32 mark = x->mark.v & x->mark.m;
1101	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1102
1103	family = x->props.family;
1104
1105	to_put = NULL;
1106
1107	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1108
1109	x1 = __xfrm_state_locate(x, use_spi, family);
1110	if (x1) {
1111		to_put = x1;
1112		x1 = NULL;
1113		err = -EEXIST;
1114		goto out;
1115	}
1116
1117	if (use_spi && x->km.seq) {
1118		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
1119		if (x1 && ((x1->id.proto != x->id.proto) ||
1120		    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
1121			to_put = x1;
1122			x1 = NULL;
1123		}
1124	}
1125
1126	if (use_spi && !x1)
1127		x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1128				     x->props.reqid, x->id.proto,
1129				     &x->id.daddr, &x->props.saddr, 0);
1130
1131	__xfrm_state_bump_genids(x);
1132	__xfrm_state_insert(x);
1133	err = 0;
1134
1135out:
1136	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1137
1138	if (x1) {
1139		xfrm_state_delete(x1);
1140		xfrm_state_put(x1);
1141	}
1142
1143	if (to_put)
1144		xfrm_state_put(to_put);
1145
1146	return err;
1147}
1148EXPORT_SYMBOL(xfrm_state_add);
1149
1150#ifdef CONFIG_XFRM_MIGRATE
1151static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
1152{
1153	struct net *net = xs_net(orig);
1154	struct xfrm_state *x = xfrm_state_alloc(net);
1155	if (!x)
1156		goto out;
1157
1158	memcpy(&x->id, &orig->id, sizeof(x->id));
1159	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1160	memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1161	x->props.mode = orig->props.mode;
1162	x->props.replay_window = orig->props.replay_window;
1163	x->props.reqid = orig->props.reqid;
1164	x->props.family = orig->props.family;
1165	x->props.saddr = orig->props.saddr;
1166
1167	if (orig->aalg) {
1168		x->aalg = xfrm_algo_auth_clone(orig->aalg);
1169		if (!x->aalg)
1170			goto error;
1171	}
1172	x->props.aalgo = orig->props.aalgo;
1173
1174	if (orig->aead) {
1175		x->aead = xfrm_algo_aead_clone(orig->aead);
1176		if (!x->aead)
1177			goto error;
1178	}
1179	if (orig->ealg) {
1180		x->ealg = xfrm_algo_clone(orig->ealg);
1181		if (!x->ealg)
1182			goto error;
1183	}
1184	x->props.ealgo = orig->props.ealgo;
1185
1186	if (orig->calg) {
1187		x->calg = xfrm_algo_clone(orig->calg);
1188		if (!x->calg)
1189			goto error;
1190	}
1191	x->props.calgo = orig->props.calgo;
1192
1193	if (orig->encap) {
1194		x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
1195		if (!x->encap)
1196			goto error;
1197	}
1198
1199	if (orig->coaddr) {
1200		x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1201				    GFP_KERNEL);
1202		if (!x->coaddr)
1203			goto error;
1204	}
1205
1206	if (orig->replay_esn) {
1207		if (xfrm_replay_clone(x, orig))
1208			goto error;
1209	}
1210
1211	memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1212
1213	if (xfrm_init_state(x) < 0)
1214		goto error;
1215
1216	x->props.flags = orig->props.flags;
1217	x->props.extra_flags = orig->props.extra_flags;
1218
1219	x->tfcpad = orig->tfcpad;
1220	x->replay_maxdiff = orig->replay_maxdiff;
1221	x->replay_maxage = orig->replay_maxage;
1222	x->curlft.add_time = orig->curlft.add_time;
1223	x->km.state = orig->km.state;
1224	x->km.seq = orig->km.seq;
1225
1226	return x;
1227
1228 error:
1229	xfrm_state_put(x);
1230out:
1231	return NULL;
1232}
1233
1234struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
1235{
1236	unsigned int h;
1237	struct xfrm_state *x = NULL;
1238
1239	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1240
1241	if (m->reqid) {
1242		h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
1243				  m->reqid, m->old_family);
1244		hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1245			if (x->props.mode != m->mode ||
1246			    x->id.proto != m->proto)
1247				continue;
1248			if (m->reqid && x->props.reqid != m->reqid)
1249				continue;
1250			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1251					     m->old_family) ||
1252			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1253					     m->old_family))
1254				continue;
1255			xfrm_state_hold(x);
1256			break;
1257		}
1258	} else {
1259		h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
1260				  m->old_family);
1261		hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
1262			if (x->props.mode != m->mode ||
1263			    x->id.proto != m->proto)
1264				continue;
1265			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1266					     m->old_family) ||
1267			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1268					     m->old_family))
1269				continue;
1270			xfrm_state_hold(x);
1271			break;
1272		}
1273	}
1274
1275	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1276
1277	return x;
1278}
1279EXPORT_SYMBOL(xfrm_migrate_state_find);
1280
1281struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1282				      struct xfrm_migrate *m)
1283{
1284	struct xfrm_state *xc;
1285
1286	xc = xfrm_state_clone(x);
1287	if (!xc)
1288		return NULL;
1289
1290	memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1291	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1292
1293	/* add state */
1294	if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
1295		/* a care is needed when the destination address of the
1296		   state is to be updated as it is a part of triplet */
1297		xfrm_state_insert(xc);
1298	} else {
1299		if (xfrm_state_add(xc) < 0)
1300			goto error;
1301	}
1302
1303	return xc;
1304error:
1305	xfrm_state_put(xc);
1306	return NULL;
1307}
1308EXPORT_SYMBOL(xfrm_state_migrate);
1309#endif
1310
1311int xfrm_state_update(struct xfrm_state *x)
1312{
1313	struct xfrm_state *x1, *to_put;
1314	int err;
1315	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1316	struct net *net = xs_net(x);
1317
1318	to_put = NULL;
1319
1320	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1321	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1322
1323	err = -ESRCH;
1324	if (!x1)
1325		goto out;
1326
1327	if (xfrm_state_kern(x1)) {
1328		to_put = x1;
1329		err = -EEXIST;
1330		goto out;
1331	}
1332
1333	if (x1->km.state == XFRM_STATE_ACQ) {
1334		__xfrm_state_insert(x);
1335		x = NULL;
1336	}
1337	err = 0;
1338
1339out:
1340	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1341
1342	if (to_put)
1343		xfrm_state_put(to_put);
1344
1345	if (err)
1346		return err;
1347
1348	if (!x) {
1349		xfrm_state_delete(x1);
1350		xfrm_state_put(x1);
1351		return 0;
1352	}
1353
1354	err = -EINVAL;
1355	spin_lock_bh(&x1->lock);
1356	if (likely(x1->km.state == XFRM_STATE_VALID)) {
1357		if (x->encap && x1->encap)
1358			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1359		if (x->coaddr && x1->coaddr) {
1360			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1361		}
1362		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1363			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1364		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1365		x1->km.dying = 0;
1366
1367		tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1368		if (x1->curlft.use_time)
1369			xfrm_state_check_expire(x1);
1370
1371		err = 0;
1372		x->km.state = XFRM_STATE_DEAD;
1373		__xfrm_state_put(x);
1374	}
1375	spin_unlock_bh(&x1->lock);
1376
1377	xfrm_state_put(x1);
1378
1379	return err;
1380}
1381EXPORT_SYMBOL(xfrm_state_update);
1382
1383int xfrm_state_check_expire(struct xfrm_state *x)
1384{
1385	if (!x->curlft.use_time)
1386		x->curlft.use_time = get_seconds();
1387
1388	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1389	    x->curlft.packets >= x->lft.hard_packet_limit) {
1390		x->km.state = XFRM_STATE_EXPIRED;
1391		tasklet_hrtimer_start(&x->mtimer, ktime_set(0, 0), HRTIMER_MODE_REL);
1392		return -EINVAL;
1393	}
1394
1395	if (!x->km.dying &&
1396	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
1397	     x->curlft.packets >= x->lft.soft_packet_limit)) {
1398		x->km.dying = 1;
1399		km_state_expired(x, 0, 0);
1400	}
1401	return 0;
1402}
1403EXPORT_SYMBOL(xfrm_state_check_expire);
1404
1405struct xfrm_state *
1406xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1407		  u8 proto, unsigned short family)
1408{
1409	struct xfrm_state *x;
1410
1411	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1412	x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1413	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1414	return x;
1415}
1416EXPORT_SYMBOL(xfrm_state_lookup);
1417
1418struct xfrm_state *
1419xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1420			 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1421			 u8 proto, unsigned short family)
1422{
1423	struct xfrm_state *x;
1424
1425	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1426	x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
1427	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1428	return x;
1429}
1430EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1431
1432struct xfrm_state *
1433xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1434	      u8 proto, const xfrm_address_t *daddr,
1435	      const xfrm_address_t *saddr, int create, unsigned short family)
1436{
1437	struct xfrm_state *x;
1438
1439	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1440	x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create);
1441	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1442
1443	return x;
1444}
1445EXPORT_SYMBOL(xfrm_find_acq);
1446
1447#ifdef CONFIG_XFRM_SUB_POLICY
1448int
1449xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1450	       unsigned short family, struct net *net)
1451{
1452	int err = 0;
1453	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1454	if (!afinfo)
1455		return -EAFNOSUPPORT;
1456
1457	spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
1458	if (afinfo->tmpl_sort)
1459		err = afinfo->tmpl_sort(dst, src, n);
1460	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1461	xfrm_state_put_afinfo(afinfo);
1462	return err;
1463}
1464EXPORT_SYMBOL(xfrm_tmpl_sort);
1465
1466int
1467xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1468		unsigned short family)
1469{
1470	int err = 0;
1471	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1472	struct net *net = xs_net(*src);
1473
1474	if (!afinfo)
1475		return -EAFNOSUPPORT;
1476
1477	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1478	if (afinfo->state_sort)
1479		err = afinfo->state_sort(dst, src, n);
1480	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1481	xfrm_state_put_afinfo(afinfo);
1482	return err;
1483}
1484EXPORT_SYMBOL(xfrm_state_sort);
1485#endif
1486
1487/* Silly enough, but I'm lazy to build resolution list */
1488
1489static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1490{
1491	int i;
1492
1493	for (i = 0; i <= net->xfrm.state_hmask; i++) {
1494		struct xfrm_state *x;
1495
1496		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
1497			if (x->km.seq == seq &&
1498			    (mark & x->mark.m) == x->mark.v &&
1499			    x->km.state == XFRM_STATE_ACQ) {
1500				xfrm_state_hold(x);
1501				return x;
1502			}
1503		}
1504	}
1505	return NULL;
1506}
1507
1508struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1509{
1510	struct xfrm_state *x;
1511
1512	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1513	x = __xfrm_find_acq_byseq(net, mark, seq);
1514	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1515	return x;
1516}
1517EXPORT_SYMBOL(xfrm_find_acq_byseq);
1518
1519u32 xfrm_get_acqseq(void)
1520{
1521	u32 res;
1522	static atomic_t acqseq;
1523
1524	do {
1525		res = atomic_inc_return(&acqseq);
1526	} while (!res);
1527
1528	return res;
1529}
1530EXPORT_SYMBOL(xfrm_get_acqseq);
1531
1532int verify_spi_info(u8 proto, u32 min, u32 max)
1533{
1534	switch (proto) {
1535	case IPPROTO_AH:
1536	case IPPROTO_ESP:
1537		break;
1538
1539	case IPPROTO_COMP:
1540		/* IPCOMP spi is 16-bits. */
1541		if (max >= 0x10000)
1542			return -EINVAL;
1543		break;
1544
1545	default:
1546		return -EINVAL;
1547	}
1548
1549	if (min > max)
1550		return -EINVAL;
1551
1552	return 0;
1553}
1554EXPORT_SYMBOL(verify_spi_info);
1555
1556int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1557{
1558	struct net *net = xs_net(x);
1559	unsigned int h;
1560	struct xfrm_state *x0;
1561	int err = -ENOENT;
1562	__be32 minspi = htonl(low);
1563	__be32 maxspi = htonl(high);
1564	u32 mark = x->mark.v & x->mark.m;
1565
1566	spin_lock_bh(&x->lock);
1567	if (x->km.state == XFRM_STATE_DEAD)
1568		goto unlock;
1569
1570	err = 0;
1571	if (x->id.spi)
1572		goto unlock;
1573
1574	err = -ENOENT;
1575
1576	if (minspi == maxspi) {
1577		x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
1578		if (x0) {
1579			xfrm_state_put(x0);
1580			goto unlock;
1581		}
1582		x->id.spi = minspi;
1583	} else {
1584		u32 spi = 0;
1585		for (h = 0; h < high-low+1; h++) {
1586			spi = low + prandom_u32()%(high-low+1);
1587			x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1588			if (x0 == NULL) {
1589				x->id.spi = htonl(spi);
1590				break;
1591			}
1592			xfrm_state_put(x0);
1593		}
1594	}
1595	if (x->id.spi) {
1596		spin_lock_bh(&net->xfrm.xfrm_state_lock);
1597		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1598		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
1599		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1600
1601		err = 0;
1602	}
1603
1604unlock:
1605	spin_unlock_bh(&x->lock);
1606
1607	return err;
1608}
1609EXPORT_SYMBOL(xfrm_alloc_spi);
1610
1611static bool __xfrm_state_filter_match(struct xfrm_state *x,
1612				      struct xfrm_address_filter *filter)
1613{
1614	if (filter) {
1615		if ((filter->family == AF_INET ||
1616		     filter->family == AF_INET6) &&
1617		    x->props.family != filter->family)
1618			return false;
1619
1620		return addr_match(&x->props.saddr, &filter->saddr,
1621				  filter->splen) &&
1622		       addr_match(&x->id.daddr, &filter->daddr,
1623				  filter->dplen);
1624	}
1625	return true;
1626}
1627
1628int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1629		    int (*func)(struct xfrm_state *, int, void*),
1630		    void *data)
1631{
1632	struct xfrm_state *state;
1633	struct xfrm_state_walk *x;
1634	int err = 0;
1635
1636	if (walk->seq != 0 && list_empty(&walk->all))
1637		return 0;
1638
1639	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1640	if (list_empty(&walk->all))
1641		x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
1642	else
1643		x = list_entry(&walk->all, struct xfrm_state_walk, all);
1644	list_for_each_entry_from(x, &net->xfrm.state_all, all) {
1645		if (x->state == XFRM_STATE_DEAD)
1646			continue;
1647		state = container_of(x, struct xfrm_state, km);
1648		if (!xfrm_id_proto_match(state->id.proto, walk->proto))
1649			continue;
1650		if (!__xfrm_state_filter_match(state, walk->filter))
1651			continue;
1652		err = func(state, walk->seq, data);
1653		if (err) {
1654			list_move_tail(&walk->all, &x->all);
1655			goto out;
1656		}
1657		walk->seq++;
1658	}
1659	if (walk->seq == 0) {
1660		err = -ENOENT;
1661		goto out;
1662	}
1663	list_del_init(&walk->all);
1664out:
1665	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1666	return err;
1667}
1668EXPORT_SYMBOL(xfrm_state_walk);
1669
1670void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
1671			  struct xfrm_address_filter *filter)
1672{
1673	INIT_LIST_HEAD(&walk->all);
1674	walk->proto = proto;
1675	walk->state = XFRM_STATE_DEAD;
1676	walk->seq = 0;
1677	walk->filter = filter;
1678}
1679EXPORT_SYMBOL(xfrm_state_walk_init);
1680
1681void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1682{
1683	kfree(walk->filter);
1684
1685	if (list_empty(&walk->all))
1686		return;
1687
1688	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1689	list_del(&walk->all);
1690	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1691}
1692EXPORT_SYMBOL(xfrm_state_walk_done);
1693
1694static void xfrm_replay_timer_handler(unsigned long data)
1695{
1696	struct xfrm_state *x = (struct xfrm_state *)data;
1697
1698	spin_lock(&x->lock);
1699
1700	if (x->km.state == XFRM_STATE_VALID) {
1701		if (xfrm_aevent_is_on(xs_net(x)))
1702			x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
1703		else
1704			x->xflags |= XFRM_TIME_DEFER;
1705	}
1706
1707	spin_unlock(&x->lock);
1708}
1709
1710static LIST_HEAD(xfrm_km_list);
1711
1712void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
1713{
1714	struct xfrm_mgr *km;
1715
1716	rcu_read_lock();
1717	list_for_each_entry_rcu(km, &xfrm_km_list, list)
1718		if (km->notify_policy)
1719			km->notify_policy(xp, dir, c);
1720	rcu_read_unlock();
1721}
1722
1723void km_state_notify(struct xfrm_state *x, const struct km_event *c)
1724{
1725	struct xfrm_mgr *km;
1726	rcu_read_lock();
1727	list_for_each_entry_rcu(km, &xfrm_km_list, list)
1728		if (km->notify)
1729			km->notify(x, c);
1730	rcu_read_unlock();
1731}
1732
1733EXPORT_SYMBOL(km_policy_notify);
1734EXPORT_SYMBOL(km_state_notify);
1735
1736void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
1737{
1738	struct km_event c;
1739
1740	c.data.hard = hard;
1741	c.portid = portid;
1742	c.event = XFRM_MSG_EXPIRE;
1743	km_state_notify(x, &c);
1744}
1745
1746EXPORT_SYMBOL(km_state_expired);
1747/*
1748 * We send to all registered managers regardless of failure
1749 * We are happy with one success
1750*/
1751int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1752{
1753	int err = -EINVAL, acqret;
1754	struct xfrm_mgr *km;
1755
1756	rcu_read_lock();
1757	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1758		acqret = km->acquire(x, t, pol);
1759		if (!acqret)
1760			err = acqret;
1761	}
1762	rcu_read_unlock();
1763	return err;
1764}
1765EXPORT_SYMBOL(km_query);
1766
1767int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1768{
1769	int err = -EINVAL;
1770	struct xfrm_mgr *km;
1771
1772	rcu_read_lock();
1773	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1774		if (km->new_mapping)
1775			err = km->new_mapping(x, ipaddr, sport);
1776		if (!err)
1777			break;
1778	}
1779	rcu_read_unlock();
1780	return err;
1781}
1782EXPORT_SYMBOL(km_new_mapping);
1783
1784void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
1785{
1786	struct km_event c;
1787
1788	c.data.hard = hard;
1789	c.portid = portid;
1790	c.event = XFRM_MSG_POLEXPIRE;
1791	km_policy_notify(pol, dir, &c);
1792}
1793EXPORT_SYMBOL(km_policy_expired);
1794
1795#ifdef CONFIG_XFRM_MIGRATE
1796int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
1797	       const struct xfrm_migrate *m, int num_migrate,
1798	       const struct xfrm_kmaddress *k)
1799{
1800	int err = -EINVAL;
1801	int ret;
1802	struct xfrm_mgr *km;
1803
1804	rcu_read_lock();
1805	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1806		if (km->migrate) {
1807			ret = km->migrate(sel, dir, type, m, num_migrate, k);
1808			if (!ret)
1809				err = ret;
1810		}
1811	}
1812	rcu_read_unlock();
1813	return err;
1814}
1815EXPORT_SYMBOL(km_migrate);
1816#endif
1817
1818int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1819{
1820	int err = -EINVAL;
1821	int ret;
1822	struct xfrm_mgr *km;
1823
1824	rcu_read_lock();
1825	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1826		if (km->report) {
1827			ret = km->report(net, proto, sel, addr);
1828			if (!ret)
1829				err = ret;
1830		}
1831	}
1832	rcu_read_unlock();
1833	return err;
1834}
1835EXPORT_SYMBOL(km_report);
1836
1837bool km_is_alive(const struct km_event *c)
1838{
1839	struct xfrm_mgr *km;
1840	bool is_alive = false;
1841
1842	rcu_read_lock();
1843	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1844		if (km->is_alive && km->is_alive(c)) {
1845			is_alive = true;
1846			break;
1847		}
1848	}
1849	rcu_read_unlock();
1850
1851	return is_alive;
1852}
1853EXPORT_SYMBOL(km_is_alive);
1854
1855int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1856{
1857	int err;
1858	u8 *data;
1859	struct xfrm_mgr *km;
1860	struct xfrm_policy *pol = NULL;
1861
1862	if (optlen <= 0 || optlen > PAGE_SIZE)
1863		return -EMSGSIZE;
1864
1865	data = kmalloc(optlen, GFP_KERNEL);
1866	if (!data)
1867		return -ENOMEM;
1868
1869	err = -EFAULT;
1870	if (copy_from_user(data, optval, optlen))
1871		goto out;
1872
1873	err = -EINVAL;
1874	rcu_read_lock();
1875	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1876		pol = km->compile_policy(sk, optname, data,
1877					 optlen, &err);
1878		if (err >= 0)
1879			break;
1880	}
1881	rcu_read_unlock();
1882
1883	if (err >= 0) {
1884		xfrm_sk_policy_insert(sk, err, pol);
1885		xfrm_pol_put(pol);
1886		err = 0;
1887	}
1888
1889out:
1890	kfree(data);
1891	return err;
1892}
1893EXPORT_SYMBOL(xfrm_user_policy);
1894
1895static DEFINE_SPINLOCK(xfrm_km_lock);
1896
1897int xfrm_register_km(struct xfrm_mgr *km)
1898{
1899	spin_lock_bh(&xfrm_km_lock);
1900	list_add_tail_rcu(&km->list, &xfrm_km_list);
1901	spin_unlock_bh(&xfrm_km_lock);
1902	return 0;
1903}
1904EXPORT_SYMBOL(xfrm_register_km);
1905
1906int xfrm_unregister_km(struct xfrm_mgr *km)
1907{
1908	spin_lock_bh(&xfrm_km_lock);
1909	list_del_rcu(&km->list);
1910	spin_unlock_bh(&xfrm_km_lock);
1911	synchronize_rcu();
1912	return 0;
1913}
1914EXPORT_SYMBOL(xfrm_unregister_km);
1915
1916int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1917{
1918	int err = 0;
1919	if (unlikely(afinfo == NULL))
1920		return -EINVAL;
1921	if (unlikely(afinfo->family >= NPROTO))
1922		return -EAFNOSUPPORT;
1923	spin_lock_bh(&xfrm_state_afinfo_lock);
1924	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1925		err = -ENOBUFS;
1926	else
1927		rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
1928	spin_unlock_bh(&xfrm_state_afinfo_lock);
1929	return err;
1930}
1931EXPORT_SYMBOL(xfrm_state_register_afinfo);
1932
1933int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1934{
1935	int err = 0;
1936	if (unlikely(afinfo == NULL))
1937		return -EINVAL;
1938	if (unlikely(afinfo->family >= NPROTO))
1939		return -EAFNOSUPPORT;
1940	spin_lock_bh(&xfrm_state_afinfo_lock);
1941	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1942		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1943			err = -EINVAL;
1944		else
1945			RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
1946	}
1947	spin_unlock_bh(&xfrm_state_afinfo_lock);
1948	synchronize_rcu();
1949	return err;
1950}
1951EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1952
1953struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
1954{
1955	struct xfrm_state_afinfo *afinfo;
1956	if (unlikely(family >= NPROTO))
1957		return NULL;
1958	rcu_read_lock();
1959	afinfo = rcu_dereference(xfrm_state_afinfo[family]);
1960	if (unlikely(!afinfo))
1961		rcu_read_unlock();
1962	return afinfo;
1963}
1964
1965void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1966{
1967	rcu_read_unlock();
1968}
1969
1970/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1971void xfrm_state_delete_tunnel(struct xfrm_state *x)
1972{
1973	if (x->tunnel) {
1974		struct xfrm_state *t = x->tunnel;
1975
1976		if (atomic_read(&t->tunnel_users) == 2)
1977			xfrm_state_delete(t);
1978		atomic_dec(&t->tunnel_users);
1979		xfrm_state_put(t);
1980		x->tunnel = NULL;
1981	}
1982}
1983EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1984
1985int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1986{
1987	int res;
1988
1989	spin_lock_bh(&x->lock);
1990	if (x->km.state == XFRM_STATE_VALID &&
1991	    x->type && x->type->get_mtu)
1992		res = x->type->get_mtu(x, mtu);
1993	else
1994		res = mtu - x->props.header_len;
1995	spin_unlock_bh(&x->lock);
1996	return res;
1997}
1998
1999int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
2000{
2001	struct xfrm_state_afinfo *afinfo;
2002	struct xfrm_mode *inner_mode;
2003	int family = x->props.family;
2004	int err;
2005
2006	err = -EAFNOSUPPORT;
2007	afinfo = xfrm_state_get_afinfo(family);
2008	if (!afinfo)
2009		goto error;
2010
2011	err = 0;
2012	if (afinfo->init_flags)
2013		err = afinfo->init_flags(x);
2014
2015	xfrm_state_put_afinfo(afinfo);
2016
2017	if (err)
2018		goto error;
2019
2020	err = -EPROTONOSUPPORT;
2021
2022	if (x->sel.family != AF_UNSPEC) {
2023		inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
2024		if (inner_mode == NULL)
2025			goto error;
2026
2027		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2028		    family != x->sel.family) {
2029			xfrm_put_mode(inner_mode);
2030			goto error;
2031		}
2032
2033		x->inner_mode = inner_mode;
2034	} else {
2035		struct xfrm_mode *inner_mode_iaf;
2036		int iafamily = AF_INET;
2037
2038		inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
2039		if (inner_mode == NULL)
2040			goto error;
2041
2042		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
2043			xfrm_put_mode(inner_mode);
2044			goto error;
2045		}
2046		x->inner_mode = inner_mode;
2047
2048		if (x->props.family == AF_INET)
2049			iafamily = AF_INET6;
2050
2051		inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
2052		if (inner_mode_iaf) {
2053			if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
2054				x->inner_mode_iaf = inner_mode_iaf;
2055			else
2056				xfrm_put_mode(inner_mode_iaf);
2057		}
2058	}
2059
2060	x->type = xfrm_get_type(x->id.proto, family);
2061	if (x->type == NULL)
2062		goto error;
2063
2064	err = x->type->init_state(x);
2065	if (err)
2066		goto error;
2067
2068	x->outer_mode = xfrm_get_mode(x->props.mode, family);
2069	if (x->outer_mode == NULL) {
2070		err = -EPROTONOSUPPORT;
2071		goto error;
2072	}
2073
2074	if (init_replay) {
2075		err = xfrm_init_replay(x);
2076		if (err)
2077			goto error;
2078	}
2079
2080	x->km.state = XFRM_STATE_VALID;
2081
2082error:
2083	return err;
2084}
2085
2086EXPORT_SYMBOL(__xfrm_init_state);
2087
2088int xfrm_init_state(struct xfrm_state *x)
2089{
2090	return __xfrm_init_state(x, true);
2091}
2092
2093EXPORT_SYMBOL(xfrm_init_state);
2094
2095int __net_init xfrm_state_init(struct net *net)
2096{
2097	unsigned int sz;
2098
2099	INIT_LIST_HEAD(&net->xfrm.state_all);
2100
2101	sz = sizeof(struct hlist_head) * 8;
2102
2103	net->xfrm.state_bydst = xfrm_hash_alloc(sz);
2104	if (!net->xfrm.state_bydst)
2105		goto out_bydst;
2106	net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
2107	if (!net->xfrm.state_bysrc)
2108		goto out_bysrc;
2109	net->xfrm.state_byspi = xfrm_hash_alloc(sz);
2110	if (!net->xfrm.state_byspi)
2111		goto out_byspi;
2112	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
2113
2114	net->xfrm.state_num = 0;
2115	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2116	INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
2117	INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
2118	spin_lock_init(&net->xfrm.xfrm_state_lock);
2119	return 0;
2120
2121out_byspi:
2122	xfrm_hash_free(net->xfrm.state_bysrc, sz);
2123out_bysrc:
2124	xfrm_hash_free(net->xfrm.state_bydst, sz);
2125out_bydst:
2126	return -ENOMEM;
2127}
2128
2129void xfrm_state_fini(struct net *net)
2130{
2131	struct xfrm_audit audit_info;
2132	unsigned int sz;
2133
2134	flush_work(&net->xfrm.state_hash_work);
2135	audit_info.loginuid = INVALID_UID;
2136	audit_info.sessionid = (unsigned int)-1;
2137	audit_info.secid = 0;
2138	xfrm_state_flush(net, IPSEC_PROTO_ANY, &audit_info);
2139	flush_work(&net->xfrm.state_gc_work);
2140
2141	WARN_ON(!list_empty(&net->xfrm.state_all));
2142
2143	sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
2144	WARN_ON(!hlist_empty(net->xfrm.state_byspi));
2145	xfrm_hash_free(net->xfrm.state_byspi, sz);
2146	WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
2147	xfrm_hash_free(net->xfrm.state_bysrc, sz);
2148	WARN_ON(!hlist_empty(net->xfrm.state_bydst));
2149	xfrm_hash_free(net->xfrm.state_bydst, sz);
2150}
2151
2152#ifdef CONFIG_AUDITSYSCALL
2153static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2154				     struct audit_buffer *audit_buf)
2155{
2156	struct xfrm_sec_ctx *ctx = x->security;
2157	u32 spi = ntohl(x->id.spi);
2158
2159	if (ctx)
2160		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2161				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2162
2163	switch (x->props.family) {
2164	case AF_INET:
2165		audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2166				 &x->props.saddr.a4, &x->id.daddr.a4);
2167		break;
2168	case AF_INET6:
2169		audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
2170				 x->props.saddr.a6, x->id.daddr.a6);
2171		break;
2172	}
2173
2174	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2175}
2176
2177static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2178				      struct audit_buffer *audit_buf)
2179{
2180	const struct iphdr *iph4;
2181	const struct ipv6hdr *iph6;
2182
2183	switch (family) {
2184	case AF_INET:
2185		iph4 = ip_hdr(skb);
2186		audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2187				 &iph4->saddr, &iph4->daddr);
2188		break;
2189	case AF_INET6:
2190		iph6 = ipv6_hdr(skb);
2191		audit_log_format(audit_buf,
2192				 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
2193				 &iph6->saddr, &iph6->daddr,
2194				 iph6->flow_lbl[0] & 0x0f,
2195				 iph6->flow_lbl[1],
2196				 iph6->flow_lbl[2]);
2197		break;
2198	}
2199}
2200
2201void xfrm_audit_state_add(struct xfrm_state *x, int result,
2202			  kuid_t auid, unsigned int sessionid, u32 secid)
2203{
2204	struct audit_buffer *audit_buf;
2205
2206	audit_buf = xfrm_audit_start("SAD-add");
2207	if (audit_buf == NULL)
2208		return;
2209	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2210	xfrm_audit_helper_sainfo(x, audit_buf);
2211	audit_log_format(audit_buf, " res=%u", result);
2212	audit_log_end(audit_buf);
2213}
2214EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2215
2216void xfrm_audit_state_delete(struct xfrm_state *x, int result,
2217			     kuid_t auid, unsigned int sessionid, u32 secid)
2218{
2219	struct audit_buffer *audit_buf;
2220
2221	audit_buf = xfrm_audit_start("SAD-delete");
2222	if (audit_buf == NULL)
2223		return;
2224	xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2225	xfrm_audit_helper_sainfo(x, audit_buf);
2226	audit_log_format(audit_buf, " res=%u", result);
2227	audit_log_end(audit_buf);
2228}
2229EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2230
2231void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2232				      struct sk_buff *skb)
2233{
2234	struct audit_buffer *audit_buf;
2235	u32 spi;
2236
2237	audit_buf = xfrm_audit_start("SA-replay-overflow");
2238	if (audit_buf == NULL)
2239		return;
2240	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2241	/* don't record the sequence number because it's inherent in this kind
2242	 * of audit message */
2243	spi = ntohl(x->id.spi);
2244	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2245	audit_log_end(audit_buf);
2246}
2247EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2248
2249void xfrm_audit_state_replay(struct xfrm_state *x,
2250			     struct sk_buff *skb, __be32 net_seq)
2251{
2252	struct audit_buffer *audit_buf;
2253	u32 spi;
2254
2255	audit_buf = xfrm_audit_start("SA-replayed-pkt");
2256	if (audit_buf == NULL)
2257		return;
2258	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2259	spi = ntohl(x->id.spi);
2260	audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2261			 spi, spi, ntohl(net_seq));
2262	audit_log_end(audit_buf);
2263}
2264EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
2265
2266void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2267{
2268	struct audit_buffer *audit_buf;
2269
2270	audit_buf = xfrm_audit_start("SA-notfound");
2271	if (audit_buf == NULL)
2272		return;
2273	xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2274	audit_log_end(audit_buf);
2275}
2276EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2277
2278void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2279			       __be32 net_spi, __be32 net_seq)
2280{
2281	struct audit_buffer *audit_buf;
2282	u32 spi;
2283
2284	audit_buf = xfrm_audit_start("SA-notfound");
2285	if (audit_buf == NULL)
2286		return;
2287	xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2288	spi = ntohl(net_spi);
2289	audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2290			 spi, spi, ntohl(net_seq));
2291	audit_log_end(audit_buf);
2292}
2293EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2294
2295void xfrm_audit_state_icvfail(struct xfrm_state *x,
2296			      struct sk_buff *skb, u8 proto)
2297{
2298	struct audit_buffer *audit_buf;
2299	__be32 net_spi;
2300	__be32 net_seq;
2301
2302	audit_buf = xfrm_audit_start("SA-icv-failure");
2303	if (audit_buf == NULL)
2304		return;
2305	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2306	if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2307		u32 spi = ntohl(net_spi);
2308		audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2309				 spi, spi, ntohl(net_seq));
2310	}
2311	audit_log_end(audit_buf);
2312}
2313EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2314#endif /* CONFIG_AUDITSYSCALL */
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * xfrm_state.c
   4 *
   5 * Changes:
   6 *	Mitsuru KANDA @USAGI
   7 * 	Kazunori MIYAZAWA @USAGI
   8 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
   9 * 		IPv6 support
  10 * 	YOSHIFUJI Hideaki @USAGI
  11 * 		Split up af-specific functions
  12 *	Derek Atkins <derek@ihtfp.com>
  13 *		Add UDP Encapsulation
  14 *
  15 */
  16
  17#include <linux/compat.h>
  18#include <linux/workqueue.h>
  19#include <net/xfrm.h>
  20#include <linux/pfkeyv2.h>
  21#include <linux/ipsec.h>
  22#include <linux/module.h>
  23#include <linux/cache.h>
  24#include <linux/audit.h>
  25#include <linux/uaccess.h>
  26#include <linux/ktime.h>
  27#include <linux/slab.h>
  28#include <linux/interrupt.h>
  29#include <linux/kernel.h>
  30
  31#include <crypto/aead.h>
  32
  33#include "xfrm_hash.h"
  34
  35#define xfrm_state_deref_prot(table, net) \
  36	rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
  37
  38static void xfrm_state_gc_task(struct work_struct *work);
  39
  40/* Each xfrm_state may be linked to two tables:
  41
  42   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
  43   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
  44      destination/tunnel endpoint. (output)
  45 */
  46
  47static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
  48static struct kmem_cache *xfrm_state_cache __ro_after_init;
  49
  50static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
  51static HLIST_HEAD(xfrm_state_gc_list);
  52
  53static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
  54{
  55	return refcount_inc_not_zero(&x->refcnt);
  56}
  57
  58static inline unsigned int xfrm_dst_hash(struct net *net,
  59					 const xfrm_address_t *daddr,
  60					 const xfrm_address_t *saddr,
  61					 u32 reqid,
  62					 unsigned short family)
  63{
  64	return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
  65}
  66
  67static inline unsigned int xfrm_src_hash(struct net *net,
  68					 const xfrm_address_t *daddr,
  69					 const xfrm_address_t *saddr,
  70					 unsigned short family)
  71{
  72	return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
  73}
  74
  75static inline unsigned int
  76xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
  77	      __be32 spi, u8 proto, unsigned short family)
  78{
  79	return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
  80}
  81
  82static unsigned int xfrm_seq_hash(struct net *net, u32 seq)
  83{
  84	return __xfrm_seq_hash(seq, net->xfrm.state_hmask);
  85}
  86
  87#define XFRM_STATE_INSERT(by, _n, _h, _type)                               \
  88	{                                                                  \
  89		struct xfrm_state *_x = NULL;                              \
  90									   \
  91		if (_type != XFRM_DEV_OFFLOAD_PACKET) {                    \
  92			hlist_for_each_entry_rcu(_x, _h, by) {             \
  93				if (_x->xso.type == XFRM_DEV_OFFLOAD_PACKET) \
  94					continue;                          \
  95				break;                                     \
  96			}                                                  \
  97		}                                                          \
  98									   \
  99		if (!_x || _x->xso.type == XFRM_DEV_OFFLOAD_PACKET)        \
 100			/* SAD is empty or consist from HW SAs only */     \
 101			hlist_add_head_rcu(_n, _h);                        \
 102		else                                                       \
 103			hlist_add_before_rcu(_n, &_x->by);                 \
 104	}
 105
 106static void xfrm_hash_transfer(struct hlist_head *list,
 107			       struct hlist_head *ndsttable,
 108			       struct hlist_head *nsrctable,
 109			       struct hlist_head *nspitable,
 110			       struct hlist_head *nseqtable,
 111			       unsigned int nhashmask)
 112{
 113	struct hlist_node *tmp;
 114	struct xfrm_state *x;
 115
 116	hlist_for_each_entry_safe(x, tmp, list, bydst) {
 117		unsigned int h;
 118
 119		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
 120				    x->props.reqid, x->props.family,
 121				    nhashmask);
 122		XFRM_STATE_INSERT(bydst, &x->bydst, ndsttable + h, x->xso.type);
 123
 124		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
 125				    x->props.family,
 126				    nhashmask);
 127		XFRM_STATE_INSERT(bysrc, &x->bysrc, nsrctable + h, x->xso.type);
 128
 129		if (x->id.spi) {
 130			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
 131					    x->id.proto, x->props.family,
 132					    nhashmask);
 133			XFRM_STATE_INSERT(byspi, &x->byspi, nspitable + h,
 134					  x->xso.type);
 135		}
 136
 137		if (x->km.seq) {
 138			h = __xfrm_seq_hash(x->km.seq, nhashmask);
 139			XFRM_STATE_INSERT(byseq, &x->byseq, nseqtable + h,
 140					  x->xso.type);
 141		}
 142	}
 143}
 144
 145static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
 146{
 147	return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
 148}
 149
 150static void xfrm_hash_resize(struct work_struct *work)
 151{
 152	struct net *net = container_of(work, struct net, xfrm.state_hash_work);
 153	struct hlist_head *ndst, *nsrc, *nspi, *nseq, *odst, *osrc, *ospi, *oseq;
 154	unsigned long nsize, osize;
 155	unsigned int nhashmask, ohashmask;
 156	int i;
 157
 158	nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
 159	ndst = xfrm_hash_alloc(nsize);
 160	if (!ndst)
 161		return;
 162	nsrc = xfrm_hash_alloc(nsize);
 163	if (!nsrc) {
 164		xfrm_hash_free(ndst, nsize);
 165		return;
 166	}
 167	nspi = xfrm_hash_alloc(nsize);
 168	if (!nspi) {
 169		xfrm_hash_free(ndst, nsize);
 170		xfrm_hash_free(nsrc, nsize);
 171		return;
 172	}
 173	nseq = xfrm_hash_alloc(nsize);
 174	if (!nseq) {
 175		xfrm_hash_free(ndst, nsize);
 176		xfrm_hash_free(nsrc, nsize);
 177		xfrm_hash_free(nspi, nsize);
 178		return;
 179	}
 180
 181	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 182	write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
 183
 184	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
 185	odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
 186	for (i = net->xfrm.state_hmask; i >= 0; i--)
 187		xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nseq, nhashmask);
 188
 189	osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
 190	ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
 191	oseq = xfrm_state_deref_prot(net->xfrm.state_byseq, net);
 192	ohashmask = net->xfrm.state_hmask;
 193
 194	rcu_assign_pointer(net->xfrm.state_bydst, ndst);
 195	rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
 196	rcu_assign_pointer(net->xfrm.state_byspi, nspi);
 197	rcu_assign_pointer(net->xfrm.state_byseq, nseq);
 198	net->xfrm.state_hmask = nhashmask;
 199
 200	write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
 201	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 202
 203	osize = (ohashmask + 1) * sizeof(struct hlist_head);
 204
 205	synchronize_rcu();
 206
 207	xfrm_hash_free(odst, osize);
 208	xfrm_hash_free(osrc, osize);
 209	xfrm_hash_free(ospi, osize);
 210	xfrm_hash_free(oseq, osize);
 211}
 212
 213static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
 214static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
 215
 216static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 217
 218int __xfrm_state_delete(struct xfrm_state *x);
 219
 220int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 221static bool km_is_alive(const struct km_event *c);
 222void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
 223
 224int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
 225{
 226	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 227	int err = 0;
 228
 229	if (!afinfo)
 230		return -EAFNOSUPPORT;
 231
 232#define X(afi, T, name) do {			\
 233		WARN_ON((afi)->type_ ## name);	\
 234		(afi)->type_ ## name = (T);	\
 235	} while (0)
 236
 237	switch (type->proto) {
 238	case IPPROTO_COMP:
 239		X(afinfo, type, comp);
 240		break;
 241	case IPPROTO_AH:
 242		X(afinfo, type, ah);
 243		break;
 244	case IPPROTO_ESP:
 245		X(afinfo, type, esp);
 246		break;
 247	case IPPROTO_IPIP:
 248		X(afinfo, type, ipip);
 249		break;
 250	case IPPROTO_DSTOPTS:
 251		X(afinfo, type, dstopts);
 252		break;
 253	case IPPROTO_ROUTING:
 254		X(afinfo, type, routing);
 255		break;
 256	case IPPROTO_IPV6:
 257		X(afinfo, type, ipip6);
 258		break;
 259	default:
 260		WARN_ON(1);
 261		err = -EPROTONOSUPPORT;
 262		break;
 263	}
 264#undef X
 265	rcu_read_unlock();
 266	return err;
 267}
 268EXPORT_SYMBOL(xfrm_register_type);
 269
 270void xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
 271{
 272	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 273
 274	if (unlikely(afinfo == NULL))
 275		return;
 276
 277#define X(afi, T, name) do {				\
 278		WARN_ON((afi)->type_ ## name != (T));	\
 279		(afi)->type_ ## name = NULL;		\
 280	} while (0)
 281
 282	switch (type->proto) {
 283	case IPPROTO_COMP:
 284		X(afinfo, type, comp);
 285		break;
 286	case IPPROTO_AH:
 287		X(afinfo, type, ah);
 288		break;
 289	case IPPROTO_ESP:
 290		X(afinfo, type, esp);
 291		break;
 292	case IPPROTO_IPIP:
 293		X(afinfo, type, ipip);
 294		break;
 295	case IPPROTO_DSTOPTS:
 296		X(afinfo, type, dstopts);
 297		break;
 298	case IPPROTO_ROUTING:
 299		X(afinfo, type, routing);
 300		break;
 301	case IPPROTO_IPV6:
 302		X(afinfo, type, ipip6);
 303		break;
 304	default:
 305		WARN_ON(1);
 306		break;
 307	}
 308#undef X
 309	rcu_read_unlock();
 310}
 311EXPORT_SYMBOL(xfrm_unregister_type);
 312
 313static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
 314{
 315	const struct xfrm_type *type = NULL;
 316	struct xfrm_state_afinfo *afinfo;
 317	int modload_attempted = 0;
 318
 319retry:
 320	afinfo = xfrm_state_get_afinfo(family);
 321	if (unlikely(afinfo == NULL))
 322		return NULL;
 323
 324	switch (proto) {
 325	case IPPROTO_COMP:
 326		type = afinfo->type_comp;
 327		break;
 328	case IPPROTO_AH:
 329		type = afinfo->type_ah;
 330		break;
 331	case IPPROTO_ESP:
 332		type = afinfo->type_esp;
 333		break;
 334	case IPPROTO_IPIP:
 335		type = afinfo->type_ipip;
 336		break;
 337	case IPPROTO_DSTOPTS:
 338		type = afinfo->type_dstopts;
 339		break;
 340	case IPPROTO_ROUTING:
 341		type = afinfo->type_routing;
 342		break;
 343	case IPPROTO_IPV6:
 344		type = afinfo->type_ipip6;
 345		break;
 346	default:
 347		break;
 348	}
 349
 350	if (unlikely(type && !try_module_get(type->owner)))
 351		type = NULL;
 352
 353	rcu_read_unlock();
 354
 355	if (!type && !modload_attempted) {
 356		request_module("xfrm-type-%d-%d", family, proto);
 357		modload_attempted = 1;
 358		goto retry;
 359	}
 360
 361	return type;
 362}
 363
 364static void xfrm_put_type(const struct xfrm_type *type)
 365{
 366	module_put(type->owner);
 367}
 368
 369int xfrm_register_type_offload(const struct xfrm_type_offload *type,
 370			       unsigned short family)
 371{
 372	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 373	int err = 0;
 374
 375	if (unlikely(afinfo == NULL))
 376		return -EAFNOSUPPORT;
 377
 378	switch (type->proto) {
 379	case IPPROTO_ESP:
 380		WARN_ON(afinfo->type_offload_esp);
 381		afinfo->type_offload_esp = type;
 382		break;
 383	default:
 384		WARN_ON(1);
 385		err = -EPROTONOSUPPORT;
 386		break;
 387	}
 388
 389	rcu_read_unlock();
 390	return err;
 391}
 392EXPORT_SYMBOL(xfrm_register_type_offload);
 393
 394void xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
 395				  unsigned short family)
 396{
 397	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
 398
 399	if (unlikely(afinfo == NULL))
 400		return;
 401
 402	switch (type->proto) {
 403	case IPPROTO_ESP:
 404		WARN_ON(afinfo->type_offload_esp != type);
 405		afinfo->type_offload_esp = NULL;
 406		break;
 407	default:
 408		WARN_ON(1);
 409		break;
 410	}
 411	rcu_read_unlock();
 412}
 413EXPORT_SYMBOL(xfrm_unregister_type_offload);
 414
 415static const struct xfrm_type_offload *
 416xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
 417{
 418	const struct xfrm_type_offload *type = NULL;
 419	struct xfrm_state_afinfo *afinfo;
 420
 421retry:
 422	afinfo = xfrm_state_get_afinfo(family);
 423	if (unlikely(afinfo == NULL))
 424		return NULL;
 425
 426	switch (proto) {
 427	case IPPROTO_ESP:
 428		type = afinfo->type_offload_esp;
 429		break;
 430	default:
 431		break;
 432	}
 433
 434	if ((type && !try_module_get(type->owner)))
 435		type = NULL;
 436
 437	rcu_read_unlock();
 438
 439	if (!type && try_load) {
 440		request_module("xfrm-offload-%d-%d", family, proto);
 441		try_load = false;
 442		goto retry;
 443	}
 444
 445	return type;
 446}
 447
 448static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
 449{
 450	module_put(type->owner);
 451}
 452
 453static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
 454	[XFRM_MODE_BEET] = {
 455		.encap = XFRM_MODE_BEET,
 456		.flags = XFRM_MODE_FLAG_TUNNEL,
 457		.family = AF_INET,
 458	},
 459	[XFRM_MODE_TRANSPORT] = {
 460		.encap = XFRM_MODE_TRANSPORT,
 461		.family = AF_INET,
 462	},
 463	[XFRM_MODE_TUNNEL] = {
 464		.encap = XFRM_MODE_TUNNEL,
 465		.flags = XFRM_MODE_FLAG_TUNNEL,
 466		.family = AF_INET,
 467	},
 468};
 469
 470static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
 471	[XFRM_MODE_BEET] = {
 472		.encap = XFRM_MODE_BEET,
 473		.flags = XFRM_MODE_FLAG_TUNNEL,
 474		.family = AF_INET6,
 475	},
 476	[XFRM_MODE_ROUTEOPTIMIZATION] = {
 477		.encap = XFRM_MODE_ROUTEOPTIMIZATION,
 478		.family = AF_INET6,
 479	},
 480	[XFRM_MODE_TRANSPORT] = {
 481		.encap = XFRM_MODE_TRANSPORT,
 482		.family = AF_INET6,
 483	},
 484	[XFRM_MODE_TUNNEL] = {
 485		.encap = XFRM_MODE_TUNNEL,
 486		.flags = XFRM_MODE_FLAG_TUNNEL,
 487		.family = AF_INET6,
 488	},
 489};
 490
 491static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
 492{
 493	const struct xfrm_mode *mode;
 494
 495	if (unlikely(encap >= XFRM_MODE_MAX))
 496		return NULL;
 497
 498	switch (family) {
 499	case AF_INET:
 500		mode = &xfrm4_mode_map[encap];
 501		if (mode->family == family)
 502			return mode;
 503		break;
 504	case AF_INET6:
 505		mode = &xfrm6_mode_map[encap];
 506		if (mode->family == family)
 507			return mode;
 508		break;
 509	default:
 510		break;
 511	}
 512
 513	return NULL;
 514}
 515
 516void xfrm_state_free(struct xfrm_state *x)
 517{
 518	kmem_cache_free(xfrm_state_cache, x);
 519}
 520EXPORT_SYMBOL(xfrm_state_free);
 521
 522static void ___xfrm_state_destroy(struct xfrm_state *x)
 523{
 524	hrtimer_cancel(&x->mtimer);
 525	del_timer_sync(&x->rtimer);
 526	kfree(x->aead);
 527	kfree(x->aalg);
 528	kfree(x->ealg);
 529	kfree(x->calg);
 530	kfree(x->encap);
 531	kfree(x->coaddr);
 532	kfree(x->replay_esn);
 533	kfree(x->preplay_esn);
 534	if (x->type_offload)
 535		xfrm_put_type_offload(x->type_offload);
 536	if (x->type) {
 537		x->type->destructor(x);
 538		xfrm_put_type(x->type);
 539	}
 540	if (x->xfrag.page)
 541		put_page(x->xfrag.page);
 542	xfrm_dev_state_free(x);
 543	security_xfrm_state_free(x);
 544	xfrm_state_free(x);
 545}
 546
 547static void xfrm_state_gc_task(struct work_struct *work)
 548{
 549	struct xfrm_state *x;
 550	struct hlist_node *tmp;
 551	struct hlist_head gc_list;
 552
 553	spin_lock_bh(&xfrm_state_gc_lock);
 554	hlist_move_list(&xfrm_state_gc_list, &gc_list);
 555	spin_unlock_bh(&xfrm_state_gc_lock);
 556
 557	synchronize_rcu();
 558
 559	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
 560		___xfrm_state_destroy(x);
 561}
 562
 563static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 564{
 565	struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer);
 566	enum hrtimer_restart ret = HRTIMER_NORESTART;
 567	time64_t now = ktime_get_real_seconds();
 568	time64_t next = TIME64_MAX;
 569	int warn = 0;
 570	int err = 0;
 571
 572	spin_lock(&x->lock);
 573	xfrm_dev_state_update_curlft(x);
 574
 575	if (x->km.state == XFRM_STATE_DEAD)
 576		goto out;
 577	if (x->km.state == XFRM_STATE_EXPIRED)
 578		goto expired;
 579	if (x->lft.hard_add_expires_seconds) {
 580		time64_t tmo = x->lft.hard_add_expires_seconds +
 581			x->curlft.add_time - now;
 582		if (tmo <= 0) {
 583			if (x->xflags & XFRM_SOFT_EXPIRE) {
 584				/* enter hard expire without soft expire first?!
 585				 * setting a new date could trigger this.
 586				 * workaround: fix x->curflt.add_time by below:
 587				 */
 588				x->curlft.add_time = now - x->saved_tmo - 1;
 589				tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
 590			} else
 591				goto expired;
 592		}
 593		if (tmo < next)
 594			next = tmo;
 595	}
 596	if (x->lft.hard_use_expires_seconds) {
 597		time64_t tmo = x->lft.hard_use_expires_seconds +
 598			(READ_ONCE(x->curlft.use_time) ? : now) - now;
 599		if (tmo <= 0)
 600			goto expired;
 601		if (tmo < next)
 602			next = tmo;
 603	}
 604	if (x->km.dying)
 605		goto resched;
 606	if (x->lft.soft_add_expires_seconds) {
 607		time64_t tmo = x->lft.soft_add_expires_seconds +
 608			x->curlft.add_time - now;
 609		if (tmo <= 0) {
 610			warn = 1;
 611			x->xflags &= ~XFRM_SOFT_EXPIRE;
 612		} else if (tmo < next) {
 613			next = tmo;
 614			x->xflags |= XFRM_SOFT_EXPIRE;
 615			x->saved_tmo = tmo;
 616		}
 617	}
 618	if (x->lft.soft_use_expires_seconds) {
 619		time64_t tmo = x->lft.soft_use_expires_seconds +
 620			(READ_ONCE(x->curlft.use_time) ? : now) - now;
 621		if (tmo <= 0)
 622			warn = 1;
 623		else if (tmo < next)
 624			next = tmo;
 625	}
 626
 627	x->km.dying = warn;
 628	if (warn)
 629		km_state_expired(x, 0, 0);
 630resched:
 631	if (next != TIME64_MAX) {
 632		hrtimer_forward_now(&x->mtimer, ktime_set(next, 0));
 633		ret = HRTIMER_RESTART;
 634	}
 635
 636	goto out;
 637
 638expired:
 639	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
 640		x->km.state = XFRM_STATE_EXPIRED;
 641
 642	err = __xfrm_state_delete(x);
 643	if (!err)
 644		km_state_expired(x, 1, 0);
 645
 646	xfrm_audit_state_delete(x, err ? 0 : 1, true);
 647
 648out:
 649	spin_unlock(&x->lock);
 650	return ret;
 651}
 652
 653static void xfrm_replay_timer_handler(struct timer_list *t);
 654
 655struct xfrm_state *xfrm_state_alloc(struct net *net)
 656{
 657	struct xfrm_state *x;
 658
 659	x = kmem_cache_zalloc(xfrm_state_cache, GFP_ATOMIC);
 660
 661	if (x) {
 662		write_pnet(&x->xs_net, net);
 663		refcount_set(&x->refcnt, 1);
 664		atomic_set(&x->tunnel_users, 0);
 665		INIT_LIST_HEAD(&x->km.all);
 666		INIT_HLIST_NODE(&x->bydst);
 667		INIT_HLIST_NODE(&x->bysrc);
 668		INIT_HLIST_NODE(&x->byspi);
 669		INIT_HLIST_NODE(&x->byseq);
 670		hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT);
 671		x->mtimer.function = xfrm_timer_handler;
 672		timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
 673		x->curlft.add_time = ktime_get_real_seconds();
 674		x->lft.soft_byte_limit = XFRM_INF;
 675		x->lft.soft_packet_limit = XFRM_INF;
 676		x->lft.hard_byte_limit = XFRM_INF;
 677		x->lft.hard_packet_limit = XFRM_INF;
 678		x->replay_maxage = 0;
 679		x->replay_maxdiff = 0;
 680		spin_lock_init(&x->lock);
 681	}
 682	return x;
 683}
 684EXPORT_SYMBOL(xfrm_state_alloc);
 685
 686void __xfrm_state_destroy(struct xfrm_state *x, bool sync)
 687{
 688	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 689
 690	if (sync) {
 691		synchronize_rcu();
 692		___xfrm_state_destroy(x);
 693	} else {
 694		spin_lock_bh(&xfrm_state_gc_lock);
 695		hlist_add_head(&x->gclist, &xfrm_state_gc_list);
 696		spin_unlock_bh(&xfrm_state_gc_lock);
 697		schedule_work(&xfrm_state_gc_work);
 698	}
 699}
 700EXPORT_SYMBOL(__xfrm_state_destroy);
 701
 702int __xfrm_state_delete(struct xfrm_state *x)
 703{
 704	struct net *net = xs_net(x);
 705	int err = -ESRCH;
 706
 707	if (x->km.state != XFRM_STATE_DEAD) {
 708		x->km.state = XFRM_STATE_DEAD;
 709		spin_lock(&net->xfrm.xfrm_state_lock);
 710		list_del(&x->km.all);
 711		hlist_del_rcu(&x->bydst);
 712		hlist_del_rcu(&x->bysrc);
 713		if (x->km.seq)
 714			hlist_del_rcu(&x->byseq);
 715		if (x->id.spi)
 716			hlist_del_rcu(&x->byspi);
 717		net->xfrm.state_num--;
 718		spin_unlock(&net->xfrm.xfrm_state_lock);
 719
 720		if (x->encap_sk)
 721			sock_put(rcu_dereference_raw(x->encap_sk));
 722
 723		xfrm_dev_state_delete(x);
 724
 725		/* All xfrm_state objects are created by xfrm_state_alloc.
 726		 * The xfrm_state_alloc call gives a reference, and that
 727		 * is what we are dropping here.
 728		 */
 729		xfrm_state_put(x);
 730		err = 0;
 731	}
 732
 733	return err;
 734}
 735EXPORT_SYMBOL(__xfrm_state_delete);
 736
 737int xfrm_state_delete(struct xfrm_state *x)
 738{
 739	int err;
 740
 741	spin_lock_bh(&x->lock);
 742	err = __xfrm_state_delete(x);
 743	spin_unlock_bh(&x->lock);
 744
 745	return err;
 746}
 747EXPORT_SYMBOL(xfrm_state_delete);
 748
 749#ifdef CONFIG_SECURITY_NETWORK_XFRM
 750static inline int
 751xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 752{
 753	int i, err = 0;
 754
 755	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 756		struct xfrm_state *x;
 757
 758		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 759			if (xfrm_id_proto_match(x->id.proto, proto) &&
 760			   (err = security_xfrm_state_delete(x)) != 0) {
 761				xfrm_audit_state_delete(x, 0, task_valid);
 762				return err;
 763			}
 764		}
 765	}
 766
 767	return err;
 768}
 769
 770static inline int
 771xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
 772{
 773	int i, err = 0;
 774
 775	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 776		struct xfrm_state *x;
 777		struct xfrm_dev_offload *xso;
 778
 779		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 780			xso = &x->xso;
 781
 782			if (xso->dev == dev &&
 783			   (err = security_xfrm_state_delete(x)) != 0) {
 784				xfrm_audit_state_delete(x, 0, task_valid);
 785				return err;
 786			}
 787		}
 788	}
 789
 790	return err;
 791}
 792#else
 793static inline int
 794xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
 795{
 796	return 0;
 797}
 798
 799static inline int
 800xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
 801{
 802	return 0;
 803}
 804#endif
 805
 806int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync)
 807{
 808	int i, err = 0, cnt = 0;
 809
 810	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 811	err = xfrm_state_flush_secctx_check(net, proto, task_valid);
 812	if (err)
 813		goto out;
 814
 815	err = -ESRCH;
 816	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 817		struct xfrm_state *x;
 818restart:
 819		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 820			if (!xfrm_state_kern(x) &&
 821			    xfrm_id_proto_match(x->id.proto, proto)) {
 822				xfrm_state_hold(x);
 823				spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 824
 825				err = xfrm_state_delete(x);
 826				xfrm_audit_state_delete(x, err ? 0 : 1,
 827							task_valid);
 828				if (sync)
 829					xfrm_state_put_sync(x);
 830				else
 831					xfrm_state_put(x);
 832				if (!err)
 833					cnt++;
 834
 835				spin_lock_bh(&net->xfrm.xfrm_state_lock);
 836				goto restart;
 837			}
 838		}
 839	}
 840out:
 841	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 842	if (cnt)
 843		err = 0;
 844
 845	return err;
 846}
 847EXPORT_SYMBOL(xfrm_state_flush);
 848
 849int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
 850{
 851	int i, err = 0, cnt = 0;
 852
 853	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 854	err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
 855	if (err)
 856		goto out;
 857
 858	err = -ESRCH;
 859	for (i = 0; i <= net->xfrm.state_hmask; i++) {
 860		struct xfrm_state *x;
 861		struct xfrm_dev_offload *xso;
 862restart:
 863		hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
 864			xso = &x->xso;
 865
 866			if (!xfrm_state_kern(x) && xso->dev == dev) {
 867				xfrm_state_hold(x);
 868				spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 869
 870				err = xfrm_state_delete(x);
 871				xfrm_audit_state_delete(x, err ? 0 : 1,
 872							task_valid);
 873				xfrm_state_put(x);
 874				if (!err)
 875					cnt++;
 876
 877				spin_lock_bh(&net->xfrm.xfrm_state_lock);
 878				goto restart;
 879			}
 880		}
 881	}
 882	if (cnt)
 883		err = 0;
 884
 885out:
 886	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 887	return err;
 888}
 889EXPORT_SYMBOL(xfrm_dev_state_flush);
 890
 891void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 892{
 893	spin_lock_bh(&net->xfrm.xfrm_state_lock);
 894	si->sadcnt = net->xfrm.state_num;
 895	si->sadhcnt = net->xfrm.state_hmask + 1;
 896	si->sadhmcnt = xfrm_state_hashmax;
 897	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 898}
 899EXPORT_SYMBOL(xfrm_sad_getinfo);
 900
 901static void
 902__xfrm4_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
 903{
 904	const struct flowi4 *fl4 = &fl->u.ip4;
 905
 906	sel->daddr.a4 = fl4->daddr;
 907	sel->saddr.a4 = fl4->saddr;
 908	sel->dport = xfrm_flowi_dport(fl, &fl4->uli);
 909	sel->dport_mask = htons(0xffff);
 910	sel->sport = xfrm_flowi_sport(fl, &fl4->uli);
 911	sel->sport_mask = htons(0xffff);
 912	sel->family = AF_INET;
 913	sel->prefixlen_d = 32;
 914	sel->prefixlen_s = 32;
 915	sel->proto = fl4->flowi4_proto;
 916	sel->ifindex = fl4->flowi4_oif;
 917}
 918
 919static void
 920__xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
 921{
 922	const struct flowi6 *fl6 = &fl->u.ip6;
 923
 924	/* Initialize temporary selector matching only to current session. */
 925	*(struct in6_addr *)&sel->daddr = fl6->daddr;
 926	*(struct in6_addr *)&sel->saddr = fl6->saddr;
 927	sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
 928	sel->dport_mask = htons(0xffff);
 929	sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
 930	sel->sport_mask = htons(0xffff);
 931	sel->family = AF_INET6;
 932	sel->prefixlen_d = 128;
 933	sel->prefixlen_s = 128;
 934	sel->proto = fl6->flowi6_proto;
 935	sel->ifindex = fl6->flowi6_oif;
 936}
 937
 938static void
 939xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
 940		    const struct xfrm_tmpl *tmpl,
 941		    const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 942		    unsigned short family)
 943{
 944	switch (family) {
 945	case AF_INET:
 946		__xfrm4_init_tempsel(&x->sel, fl);
 947		break;
 948	case AF_INET6:
 949		__xfrm6_init_tempsel(&x->sel, fl);
 950		break;
 951	}
 952
 953	x->id = tmpl->id;
 954
 955	switch (tmpl->encap_family) {
 956	case AF_INET:
 957		if (x->id.daddr.a4 == 0)
 958			x->id.daddr.a4 = daddr->a4;
 959		x->props.saddr = tmpl->saddr;
 960		if (x->props.saddr.a4 == 0)
 961			x->props.saddr.a4 = saddr->a4;
 962		break;
 963	case AF_INET6:
 964		if (ipv6_addr_any((struct in6_addr *)&x->id.daddr))
 965			memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
 966		memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
 967		if (ipv6_addr_any((struct in6_addr *)&x->props.saddr))
 968			memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
 969		break;
 970	}
 971
 972	x->props.mode = tmpl->mode;
 973	x->props.reqid = tmpl->reqid;
 974	x->props.family = tmpl->encap_family;
 975}
 976
 977static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark,
 978						  const xfrm_address_t *daddr,
 979						  __be32 spi, u8 proto,
 980						  unsigned short family,
 981						  struct xfrm_dev_offload *xdo)
 982{
 983	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 984	struct xfrm_state *x;
 985
 986	hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
 987#ifdef CONFIG_XFRM_OFFLOAD
 988		if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) {
 989			if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
 990				/* HW states are in the head of list, there is
 991				 * no need to iterate further.
 992				 */
 993				break;
 994
 995			/* Packet offload: both policy and SA should
 996			 * have same device.
 997			 */
 998			if (xdo->dev != x->xso.dev)
 999				continue;
1000		} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
1001			/* Skip HW policy for SW lookups */
1002			continue;
1003#endif
1004		if (x->props.family != family ||
1005		    x->id.spi       != spi ||
1006		    x->id.proto     != proto ||
1007		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
1008			continue;
1009
1010		if ((mark & x->mark.m) != x->mark.v)
1011			continue;
1012		if (!xfrm_state_hold_rcu(x))
1013			continue;
1014		return x;
1015	}
1016
1017	return NULL;
1018}
1019
1020static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
1021					      const xfrm_address_t *daddr,
1022					      __be32 spi, u8 proto,
1023					      unsigned short family)
1024{
1025	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
1026	struct xfrm_state *x;
1027
1028	hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
1029		if (x->props.family != family ||
1030		    x->id.spi       != spi ||
1031		    x->id.proto     != proto ||
1032		    !xfrm_addr_equal(&x->id.daddr, daddr, family))
1033			continue;
1034
1035		if ((mark & x->mark.m) != x->mark.v)
1036			continue;
1037		if (!xfrm_state_hold_rcu(x))
1038			continue;
1039		return x;
1040	}
1041
1042	return NULL;
1043}
1044
1045static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1046						     const xfrm_address_t *daddr,
1047						     const xfrm_address_t *saddr,
1048						     u8 proto, unsigned short family)
1049{
1050	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
1051	struct xfrm_state *x;
1052
1053	hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
1054		if (x->props.family != family ||
1055		    x->id.proto     != proto ||
1056		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1057		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
1058			continue;
1059
1060		if ((mark & x->mark.m) != x->mark.v)
1061			continue;
1062		if (!xfrm_state_hold_rcu(x))
1063			continue;
1064		return x;
1065	}
1066
1067	return NULL;
1068}
1069
1070static inline struct xfrm_state *
1071__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
1072{
1073	struct net *net = xs_net(x);
1074	u32 mark = x->mark.v & x->mark.m;
1075
1076	if (use_spi)
1077		return __xfrm_state_lookup(net, mark, &x->id.daddr,
1078					   x->id.spi, x->id.proto, family);
1079	else
1080		return __xfrm_state_lookup_byaddr(net, mark,
1081						  &x->id.daddr,
1082						  &x->props.saddr,
1083						  x->id.proto, family);
1084}
1085
1086static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
1087{
1088	if (have_hash_collision &&
1089	    (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
1090	    net->xfrm.state_num > net->xfrm.state_hmask)
1091		schedule_work(&net->xfrm.state_hash_work);
1092}
1093
1094static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
1095			       const struct flowi *fl, unsigned short family,
1096			       struct xfrm_state **best, int *acq_in_progress,
1097			       int *error)
1098{
1099	/* Resolution logic:
1100	 * 1. There is a valid state with matching selector. Done.
1101	 * 2. Valid state with inappropriate selector. Skip.
1102	 *
1103	 * Entering area of "sysdeps".
1104	 *
1105	 * 3. If state is not valid, selector is temporary, it selects
1106	 *    only session which triggered previous resolution. Key
1107	 *    manager will do something to install a state with proper
1108	 *    selector.
1109	 */
1110	if (x->km.state == XFRM_STATE_VALID) {
1111		if ((x->sel.family &&
1112		     (x->sel.family != family ||
1113		      !xfrm_selector_match(&x->sel, fl, family))) ||
1114		    !security_xfrm_state_pol_flow_match(x, pol,
1115							&fl->u.__fl_common))
1116			return;
1117
1118		if (!*best ||
1119		    (*best)->km.dying > x->km.dying ||
1120		    ((*best)->km.dying == x->km.dying &&
1121		     (*best)->curlft.add_time < x->curlft.add_time))
1122			*best = x;
1123	} else if (x->km.state == XFRM_STATE_ACQ) {
1124		*acq_in_progress = 1;
1125	} else if (x->km.state == XFRM_STATE_ERROR ||
1126		   x->km.state == XFRM_STATE_EXPIRED) {
1127		if ((!x->sel.family ||
1128		     (x->sel.family == family &&
1129		      xfrm_selector_match(&x->sel, fl, family))) &&
1130		    security_xfrm_state_pol_flow_match(x, pol,
1131						       &fl->u.__fl_common))
1132			*error = -ESRCH;
1133	}
1134}
1135
1136struct xfrm_state *
1137xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1138		const struct flowi *fl, struct xfrm_tmpl *tmpl,
1139		struct xfrm_policy *pol, int *err,
1140		unsigned short family, u32 if_id)
1141{
1142	static xfrm_address_t saddr_wildcard = { };
1143	struct net *net = xp_net(pol);
1144	unsigned int h, h_wildcard;
1145	struct xfrm_state *x, *x0, *to_put;
1146	int acquire_in_progress = 0;
1147	int error = 0;
1148	struct xfrm_state *best = NULL;
1149	u32 mark = pol->mark.v & pol->mark.m;
1150	unsigned short encap_family = tmpl->encap_family;
1151	unsigned int sequence;
1152	struct km_event c;
1153
1154	to_put = NULL;
1155
1156	sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
1157
1158	rcu_read_lock();
1159	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
1160	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
1161#ifdef CONFIG_XFRM_OFFLOAD
1162		if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
1163			if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1164				/* HW states are in the head of list, there is
1165				 * no need to iterate further.
1166				 */
1167				break;
1168
1169			/* Packet offload: both policy and SA should
1170			 * have same device.
1171			 */
1172			if (pol->xdo.dev != x->xso.dev)
1173				continue;
1174		} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
1175			/* Skip HW policy for SW lookups */
1176			continue;
1177#endif
1178		if (x->props.family == encap_family &&
1179		    x->props.reqid == tmpl->reqid &&
1180		    (mark & x->mark.m) == x->mark.v &&
1181		    x->if_id == if_id &&
1182		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
1183		    xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
1184		    tmpl->mode == x->props.mode &&
1185		    tmpl->id.proto == x->id.proto &&
1186		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
1187			xfrm_state_look_at(pol, x, fl, family,
1188					   &best, &acquire_in_progress, &error);
1189	}
1190	if (best || acquire_in_progress)
1191		goto found;
1192
1193	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
1194	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
1195#ifdef CONFIG_XFRM_OFFLOAD
1196		if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
1197			if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
1198				/* HW states are in the head of list, there is
1199				 * no need to iterate further.
1200				 */
1201				break;
1202
1203			/* Packet offload: both policy and SA should
1204			 * have same device.
1205			 */
1206			if (pol->xdo.dev != x->xso.dev)
1207				continue;
1208		} else if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
1209			/* Skip HW policy for SW lookups */
1210			continue;
1211#endif
1212		if (x->props.family == encap_family &&
1213		    x->props.reqid == tmpl->reqid &&
1214		    (mark & x->mark.m) == x->mark.v &&
1215		    x->if_id == if_id &&
1216		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
1217		    xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
1218		    tmpl->mode == x->props.mode &&
1219		    tmpl->id.proto == x->id.proto &&
1220		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
1221			xfrm_state_look_at(pol, x, fl, family,
1222					   &best, &acquire_in_progress, &error);
1223	}
1224
1225found:
1226	x = best;
1227	if (!x && !error && !acquire_in_progress) {
1228		if (tmpl->id.spi &&
1229		    (x0 = __xfrm_state_lookup_all(net, mark, daddr,
1230						  tmpl->id.spi, tmpl->id.proto,
1231						  encap_family,
1232						  &pol->xdo)) != NULL) {
1233			to_put = x0;
1234			error = -EEXIST;
1235			goto out;
1236		}
1237
1238		c.net = net;
1239		/* If the KMs have no listeners (yet...), avoid allocating an SA
1240		 * for each and every packet - garbage collection might not
1241		 * handle the flood.
1242		 */
1243		if (!km_is_alive(&c)) {
1244			error = -ESRCH;
1245			goto out;
1246		}
1247
1248		x = xfrm_state_alloc(net);
1249		if (x == NULL) {
1250			error = -ENOMEM;
1251			goto out;
1252		}
1253		/* Initialize temporary state matching only
1254		 * to current session. */
1255		xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
1256		memcpy(&x->mark, &pol->mark, sizeof(x->mark));
1257		x->if_id = if_id;
1258
1259		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
1260		if (error) {
1261			x->km.state = XFRM_STATE_DEAD;
1262			to_put = x;
1263			x = NULL;
1264			goto out;
1265		}
1266#ifdef CONFIG_XFRM_OFFLOAD
1267		if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) {
1268			struct xfrm_dev_offload *xdo = &pol->xdo;
1269			struct xfrm_dev_offload *xso = &x->xso;
1270
1271			xso->type = XFRM_DEV_OFFLOAD_PACKET;
1272			xso->dir = xdo->dir;
1273			xso->dev = xdo->dev;
1274			xso->real_dev = xdo->real_dev;
1275			netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
1276					     GFP_ATOMIC);
1277			error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
1278			if (error) {
1279				xso->dir = 0;
1280				netdev_put(xso->dev, &xso->dev_tracker);
1281				xso->dev = NULL;
1282				xso->real_dev = NULL;
1283				xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
1284				x->km.state = XFRM_STATE_DEAD;
1285				to_put = x;
1286				x = NULL;
1287				goto out;
1288			}
1289		}
1290#endif
1291		if (km_query(x, tmpl, pol) == 0) {
1292			spin_lock_bh(&net->xfrm.xfrm_state_lock);
1293			x->km.state = XFRM_STATE_ACQ;
1294			list_add(&x->km.all, &net->xfrm.state_all);
1295			XFRM_STATE_INSERT(bydst, &x->bydst,
1296					  net->xfrm.state_bydst + h,
1297					  x->xso.type);
1298			h = xfrm_src_hash(net, daddr, saddr, encap_family);
1299			XFRM_STATE_INSERT(bysrc, &x->bysrc,
1300					  net->xfrm.state_bysrc + h,
1301					  x->xso.type);
1302			if (x->id.spi) {
1303				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
1304				XFRM_STATE_INSERT(byspi, &x->byspi,
1305						  net->xfrm.state_byspi + h,
1306						  x->xso.type);
1307			}
1308			if (x->km.seq) {
1309				h = xfrm_seq_hash(net, x->km.seq);
1310				XFRM_STATE_INSERT(byseq, &x->byseq,
1311						  net->xfrm.state_byseq + h,
1312						  x->xso.type);
1313			}
1314			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1315			hrtimer_start(&x->mtimer,
1316				      ktime_set(net->xfrm.sysctl_acq_expires, 0),
1317				      HRTIMER_MODE_REL_SOFT);
1318			net->xfrm.state_num++;
1319			xfrm_hash_grow_check(net, x->bydst.next != NULL);
1320			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1321		} else {
1322#ifdef CONFIG_XFRM_OFFLOAD
1323			struct xfrm_dev_offload *xso = &x->xso;
1324
1325			if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
1326				xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
1327				xso->dir = 0;
1328				netdev_put(xso->dev, &xso->dev_tracker);
1329				xso->dev = NULL;
1330				xso->real_dev = NULL;
1331				xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
1332			}
1333#endif
1334			x->km.state = XFRM_STATE_DEAD;
1335			to_put = x;
1336			x = NULL;
1337			error = -ESRCH;
1338		}
1339	}
1340out:
1341	if (x) {
1342		if (!xfrm_state_hold_rcu(x)) {
1343			*err = -EAGAIN;
1344			x = NULL;
1345		}
1346	} else {
1347		*err = acquire_in_progress ? -EAGAIN : error;
1348	}
1349	rcu_read_unlock();
1350	if (to_put)
1351		xfrm_state_put(to_put);
1352
1353	if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) {
1354		*err = -EAGAIN;
1355		if (x) {
1356			xfrm_state_put(x);
1357			x = NULL;
1358		}
1359	}
1360
1361	return x;
1362}
1363
1364struct xfrm_state *
1365xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
1366		    xfrm_address_t *daddr, xfrm_address_t *saddr,
1367		    unsigned short family, u8 mode, u8 proto, u32 reqid)
1368{
1369	unsigned int h;
1370	struct xfrm_state *rx = NULL, *x = NULL;
1371
1372	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1373	h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1374	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1375		if (x->props.family == family &&
1376		    x->props.reqid == reqid &&
1377		    (mark & x->mark.m) == x->mark.v &&
1378		    x->if_id == if_id &&
1379		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
1380		    xfrm_state_addr_check(x, daddr, saddr, family) &&
1381		    mode == x->props.mode &&
1382		    proto == x->id.proto &&
1383		    x->km.state == XFRM_STATE_VALID) {
1384			rx = x;
1385			break;
1386		}
1387	}
1388
1389	if (rx)
1390		xfrm_state_hold(rx);
1391	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1392
1393
1394	return rx;
1395}
1396EXPORT_SYMBOL(xfrm_stateonly_find);
1397
1398struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
1399					      unsigned short family)
1400{
1401	struct xfrm_state *x;
1402	struct xfrm_state_walk *w;
1403
1404	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1405	list_for_each_entry(w, &net->xfrm.state_all, all) {
1406		x = container_of(w, struct xfrm_state, km);
1407		if (x->props.family != family ||
1408			x->id.spi != spi)
1409			continue;
1410
1411		xfrm_state_hold(x);
1412		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1413		return x;
1414	}
1415	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1416	return NULL;
1417}
1418EXPORT_SYMBOL(xfrm_state_lookup_byspi);
1419
1420static void __xfrm_state_insert(struct xfrm_state *x)
1421{
1422	struct net *net = xs_net(x);
1423	unsigned int h;
1424
1425	list_add(&x->km.all, &net->xfrm.state_all);
1426
1427	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
1428			  x->props.reqid, x->props.family);
1429	XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
1430			  x->xso.type);
1431
1432	h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
1433	XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
1434			  x->xso.type);
1435
1436	if (x->id.spi) {
1437		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
1438				  x->props.family);
1439
1440		XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
1441				  x->xso.type);
1442	}
1443
1444	if (x->km.seq) {
1445		h = xfrm_seq_hash(net, x->km.seq);
1446
1447		XFRM_STATE_INSERT(byseq, &x->byseq, net->xfrm.state_byseq + h,
1448				  x->xso.type);
1449	}
1450
1451	hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT);
1452	if (x->replay_maxage)
1453		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
1454
1455	net->xfrm.state_num++;
1456
1457	xfrm_hash_grow_check(net, x->bydst.next != NULL);
1458}
1459
1460/* net->xfrm.xfrm_state_lock is held */
1461static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
1462{
1463	struct net *net = xs_net(xnew);
1464	unsigned short family = xnew->props.family;
1465	u32 reqid = xnew->props.reqid;
1466	struct xfrm_state *x;
1467	unsigned int h;
1468	u32 mark = xnew->mark.v & xnew->mark.m;
1469	u32 if_id = xnew->if_id;
1470
1471	h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
1472	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1473		if (x->props.family	== family &&
1474		    x->props.reqid	== reqid &&
1475		    x->if_id		== if_id &&
1476		    (mark & x->mark.m) == x->mark.v &&
1477		    xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
1478		    xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
1479			x->genid++;
1480	}
1481}
1482
1483void xfrm_state_insert(struct xfrm_state *x)
1484{
1485	struct net *net = xs_net(x);
1486
1487	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1488	__xfrm_state_bump_genids(x);
1489	__xfrm_state_insert(x);
1490	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1491}
1492EXPORT_SYMBOL(xfrm_state_insert);
1493
1494/* net->xfrm.xfrm_state_lock is held */
1495static struct xfrm_state *__find_acq_core(struct net *net,
1496					  const struct xfrm_mark *m,
1497					  unsigned short family, u8 mode,
1498					  u32 reqid, u32 if_id, u8 proto,
1499					  const xfrm_address_t *daddr,
1500					  const xfrm_address_t *saddr,
1501					  int create)
1502{
1503	unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1504	struct xfrm_state *x;
1505	u32 mark = m->v & m->m;
1506
1507	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1508		if (x->props.reqid  != reqid ||
1509		    x->props.mode   != mode ||
1510		    x->props.family != family ||
1511		    x->km.state     != XFRM_STATE_ACQ ||
1512		    x->id.spi       != 0 ||
1513		    x->id.proto	    != proto ||
1514		    (mark & x->mark.m) != x->mark.v ||
1515		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1516		    !xfrm_addr_equal(&x->props.saddr, saddr, family))
1517			continue;
1518
1519		xfrm_state_hold(x);
1520		return x;
1521	}
1522
1523	if (!create)
1524		return NULL;
1525
1526	x = xfrm_state_alloc(net);
1527	if (likely(x)) {
1528		switch (family) {
1529		case AF_INET:
1530			x->sel.daddr.a4 = daddr->a4;
1531			x->sel.saddr.a4 = saddr->a4;
1532			x->sel.prefixlen_d = 32;
1533			x->sel.prefixlen_s = 32;
1534			x->props.saddr.a4 = saddr->a4;
1535			x->id.daddr.a4 = daddr->a4;
1536			break;
1537
1538		case AF_INET6:
1539			x->sel.daddr.in6 = daddr->in6;
1540			x->sel.saddr.in6 = saddr->in6;
1541			x->sel.prefixlen_d = 128;
1542			x->sel.prefixlen_s = 128;
1543			x->props.saddr.in6 = saddr->in6;
1544			x->id.daddr.in6 = daddr->in6;
1545			break;
1546		}
1547
1548		x->km.state = XFRM_STATE_ACQ;
1549		x->id.proto = proto;
1550		x->props.family = family;
1551		x->props.mode = mode;
1552		x->props.reqid = reqid;
1553		x->if_id = if_id;
1554		x->mark.v = m->v;
1555		x->mark.m = m->m;
1556		x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1557		xfrm_state_hold(x);
1558		hrtimer_start(&x->mtimer,
1559			      ktime_set(net->xfrm.sysctl_acq_expires, 0),
1560			      HRTIMER_MODE_REL_SOFT);
1561		list_add(&x->km.all, &net->xfrm.state_all);
1562		XFRM_STATE_INSERT(bydst, &x->bydst, net->xfrm.state_bydst + h,
1563				  x->xso.type);
1564		h = xfrm_src_hash(net, daddr, saddr, family);
1565		XFRM_STATE_INSERT(bysrc, &x->bysrc, net->xfrm.state_bysrc + h,
1566				  x->xso.type);
1567
1568		net->xfrm.state_num++;
1569
1570		xfrm_hash_grow_check(net, x->bydst.next != NULL);
1571	}
1572
1573	return x;
1574}
1575
1576static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1577
1578int xfrm_state_add(struct xfrm_state *x)
1579{
1580	struct net *net = xs_net(x);
1581	struct xfrm_state *x1, *to_put;
1582	int family;
1583	int err;
1584	u32 mark = x->mark.v & x->mark.m;
1585	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1586
1587	family = x->props.family;
1588
1589	to_put = NULL;
1590
1591	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1592
1593	x1 = __xfrm_state_locate(x, use_spi, family);
1594	if (x1) {
1595		to_put = x1;
1596		x1 = NULL;
1597		err = -EEXIST;
1598		goto out;
1599	}
1600
1601	if (use_spi && x->km.seq) {
1602		x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
1603		if (x1 && ((x1->id.proto != x->id.proto) ||
1604		    !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
1605			to_put = x1;
1606			x1 = NULL;
1607		}
1608	}
1609
1610	if (use_spi && !x1)
1611		x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1612				     x->props.reqid, x->if_id, x->id.proto,
1613				     &x->id.daddr, &x->props.saddr, 0);
1614
1615	__xfrm_state_bump_genids(x);
1616	__xfrm_state_insert(x);
1617	err = 0;
1618
1619out:
1620	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1621
1622	if (x1) {
1623		xfrm_state_delete(x1);
1624		xfrm_state_put(x1);
1625	}
1626
1627	if (to_put)
1628		xfrm_state_put(to_put);
1629
1630	return err;
1631}
1632EXPORT_SYMBOL(xfrm_state_add);
1633
1634#ifdef CONFIG_XFRM_MIGRATE
1635static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security)
1636{
1637	struct xfrm_user_sec_ctx *uctx;
1638	int size = sizeof(*uctx) + security->ctx_len;
1639	int err;
1640
1641	uctx = kmalloc(size, GFP_KERNEL);
1642	if (!uctx)
1643		return -ENOMEM;
1644
1645	uctx->exttype = XFRMA_SEC_CTX;
1646	uctx->len = size;
1647	uctx->ctx_doi = security->ctx_doi;
1648	uctx->ctx_alg = security->ctx_alg;
1649	uctx->ctx_len = security->ctx_len;
1650	memcpy(uctx + 1, security->ctx_str, security->ctx_len);
1651	err = security_xfrm_state_alloc(x, uctx);
1652	kfree(uctx);
1653	if (err)
1654		return err;
1655
1656	return 0;
1657}
1658
1659static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1660					   struct xfrm_encap_tmpl *encap)
1661{
1662	struct net *net = xs_net(orig);
1663	struct xfrm_state *x = xfrm_state_alloc(net);
1664	if (!x)
1665		goto out;
1666
1667	memcpy(&x->id, &orig->id, sizeof(x->id));
1668	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1669	memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1670	x->props.mode = orig->props.mode;
1671	x->props.replay_window = orig->props.replay_window;
1672	x->props.reqid = orig->props.reqid;
1673	x->props.family = orig->props.family;
1674	x->props.saddr = orig->props.saddr;
1675
1676	if (orig->aalg) {
1677		x->aalg = xfrm_algo_auth_clone(orig->aalg);
1678		if (!x->aalg)
1679			goto error;
1680	}
1681	x->props.aalgo = orig->props.aalgo;
1682
1683	if (orig->aead) {
1684		x->aead = xfrm_algo_aead_clone(orig->aead);
1685		x->geniv = orig->geniv;
1686		if (!x->aead)
1687			goto error;
1688	}
1689	if (orig->ealg) {
1690		x->ealg = xfrm_algo_clone(orig->ealg);
1691		if (!x->ealg)
1692			goto error;
1693	}
1694	x->props.ealgo = orig->props.ealgo;
1695
1696	if (orig->calg) {
1697		x->calg = xfrm_algo_clone(orig->calg);
1698		if (!x->calg)
1699			goto error;
1700	}
1701	x->props.calgo = orig->props.calgo;
1702
1703	if (encap || orig->encap) {
1704		if (encap)
1705			x->encap = kmemdup(encap, sizeof(*x->encap),
1706					GFP_KERNEL);
1707		else
1708			x->encap = kmemdup(orig->encap, sizeof(*x->encap),
1709					GFP_KERNEL);
1710
1711		if (!x->encap)
1712			goto error;
1713	}
1714
1715	if (orig->security)
1716		if (clone_security(x, orig->security))
1717			goto error;
1718
1719	if (orig->coaddr) {
1720		x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1721				    GFP_KERNEL);
1722		if (!x->coaddr)
1723			goto error;
1724	}
1725
1726	if (orig->replay_esn) {
1727		if (xfrm_replay_clone(x, orig))
1728			goto error;
1729	}
1730
1731	memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1732	memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
1733
1734	x->props.flags = orig->props.flags;
1735	x->props.extra_flags = orig->props.extra_flags;
1736
1737	x->if_id = orig->if_id;
1738	x->tfcpad = orig->tfcpad;
1739	x->replay_maxdiff = orig->replay_maxdiff;
1740	x->replay_maxage = orig->replay_maxage;
1741	memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft));
1742	x->km.state = orig->km.state;
1743	x->km.seq = orig->km.seq;
1744	x->replay = orig->replay;
1745	x->preplay = orig->preplay;
1746	x->mapping_maxage = orig->mapping_maxage;
1747	x->lastused = orig->lastused;
1748	x->new_mapping = 0;
1749	x->new_mapping_sport = 0;
1750
1751	return x;
1752
1753 error:
1754	xfrm_state_put(x);
1755out:
1756	return NULL;
1757}
1758
1759struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
1760						u32 if_id)
1761{
1762	unsigned int h;
1763	struct xfrm_state *x = NULL;
1764
1765	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1766
1767	if (m->reqid) {
1768		h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
1769				  m->reqid, m->old_family);
1770		hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1771			if (x->props.mode != m->mode ||
1772			    x->id.proto != m->proto)
1773				continue;
1774			if (m->reqid && x->props.reqid != m->reqid)
1775				continue;
1776			if (if_id != 0 && x->if_id != if_id)
1777				continue;
1778			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1779					     m->old_family) ||
1780			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1781					     m->old_family))
1782				continue;
1783			xfrm_state_hold(x);
1784			break;
1785		}
1786	} else {
1787		h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
1788				  m->old_family);
1789		hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
1790			if (x->props.mode != m->mode ||
1791			    x->id.proto != m->proto)
1792				continue;
1793			if (if_id != 0 && x->if_id != if_id)
1794				continue;
1795			if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1796					     m->old_family) ||
1797			    !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1798					     m->old_family))
1799				continue;
1800			xfrm_state_hold(x);
1801			break;
1802		}
1803	}
1804
1805	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1806
1807	return x;
1808}
1809EXPORT_SYMBOL(xfrm_migrate_state_find);
1810
1811struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1812				      struct xfrm_migrate *m,
1813				      struct xfrm_encap_tmpl *encap)
1814{
1815	struct xfrm_state *xc;
1816
1817	xc = xfrm_state_clone(x, encap);
1818	if (!xc)
1819		return NULL;
1820
1821	xc->props.family = m->new_family;
1822
1823	if (xfrm_init_state(xc) < 0)
1824		goto error;
1825
1826	memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1827	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1828
1829	/* add state */
1830	if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
1831		/* a care is needed when the destination address of the
1832		   state is to be updated as it is a part of triplet */
1833		xfrm_state_insert(xc);
1834	} else {
1835		if (xfrm_state_add(xc) < 0)
1836			goto error;
1837	}
1838
1839	return xc;
1840error:
1841	xfrm_state_put(xc);
1842	return NULL;
1843}
1844EXPORT_SYMBOL(xfrm_state_migrate);
1845#endif
1846
1847int xfrm_state_update(struct xfrm_state *x)
1848{
1849	struct xfrm_state *x1, *to_put;
1850	int err;
1851	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1852	struct net *net = xs_net(x);
1853
1854	to_put = NULL;
1855
1856	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1857	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1858
1859	err = -ESRCH;
1860	if (!x1)
1861		goto out;
1862
1863	if (xfrm_state_kern(x1)) {
1864		to_put = x1;
1865		err = -EEXIST;
1866		goto out;
1867	}
1868
1869	if (x1->km.state == XFRM_STATE_ACQ) {
1870		__xfrm_state_insert(x);
1871		x = NULL;
1872	}
1873	err = 0;
1874
1875out:
1876	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1877
1878	if (to_put)
1879		xfrm_state_put(to_put);
1880
1881	if (err)
1882		return err;
1883
1884	if (!x) {
1885		xfrm_state_delete(x1);
1886		xfrm_state_put(x1);
1887		return 0;
1888	}
1889
1890	err = -EINVAL;
1891	spin_lock_bh(&x1->lock);
1892	if (likely(x1->km.state == XFRM_STATE_VALID)) {
1893		if (x->encap && x1->encap &&
1894		    x->encap->encap_type == x1->encap->encap_type)
1895			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1896		else if (x->encap || x1->encap)
1897			goto fail;
1898
1899		if (x->coaddr && x1->coaddr) {
1900			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1901		}
1902		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1903			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1904		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1905		x1->km.dying = 0;
1906
1907		hrtimer_start(&x1->mtimer, ktime_set(1, 0),
1908			      HRTIMER_MODE_REL_SOFT);
1909		if (READ_ONCE(x1->curlft.use_time))
1910			xfrm_state_check_expire(x1);
1911
1912		if (x->props.smark.m || x->props.smark.v || x->if_id) {
1913			spin_lock_bh(&net->xfrm.xfrm_state_lock);
1914
1915			if (x->props.smark.m || x->props.smark.v)
1916				x1->props.smark = x->props.smark;
1917
1918			if (x->if_id)
1919				x1->if_id = x->if_id;
1920
1921			__xfrm_state_bump_genids(x1);
1922			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1923		}
1924
1925		err = 0;
1926		x->km.state = XFRM_STATE_DEAD;
1927		__xfrm_state_put(x);
1928	}
1929
1930fail:
1931	spin_unlock_bh(&x1->lock);
1932
1933	xfrm_state_put(x1);
1934
1935	return err;
1936}
1937EXPORT_SYMBOL(xfrm_state_update);
1938
1939int xfrm_state_check_expire(struct xfrm_state *x)
1940{
1941	xfrm_dev_state_update_curlft(x);
1942
1943	if (!READ_ONCE(x->curlft.use_time))
1944		WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
1945
1946	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1947	    x->curlft.packets >= x->lft.hard_packet_limit) {
1948		x->km.state = XFRM_STATE_EXPIRED;
1949		hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT);
1950		return -EINVAL;
1951	}
1952
1953	if (!x->km.dying &&
1954	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
1955	     x->curlft.packets >= x->lft.soft_packet_limit)) {
1956		x->km.dying = 1;
1957		km_state_expired(x, 0, 0);
1958	}
1959	return 0;
1960}
1961EXPORT_SYMBOL(xfrm_state_check_expire);
1962
1963struct xfrm_state *
1964xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1965		  u8 proto, unsigned short family)
1966{
1967	struct xfrm_state *x;
1968
1969	rcu_read_lock();
1970	x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1971	rcu_read_unlock();
1972	return x;
1973}
1974EXPORT_SYMBOL(xfrm_state_lookup);
1975
1976struct xfrm_state *
1977xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1978			 const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1979			 u8 proto, unsigned short family)
1980{
1981	struct xfrm_state *x;
1982
1983	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1984	x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
1985	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1986	return x;
1987}
1988EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1989
1990struct xfrm_state *
1991xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1992	      u32 if_id, u8 proto, const xfrm_address_t *daddr,
1993	      const xfrm_address_t *saddr, int create, unsigned short family)
1994{
1995	struct xfrm_state *x;
1996
1997	spin_lock_bh(&net->xfrm.xfrm_state_lock);
1998	x = __find_acq_core(net, mark, family, mode, reqid, if_id, proto, daddr, saddr, create);
1999	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2000
2001	return x;
2002}
2003EXPORT_SYMBOL(xfrm_find_acq);
2004
2005#ifdef CONFIG_XFRM_SUB_POLICY
2006#if IS_ENABLED(CONFIG_IPV6)
2007/* distribution counting sort function for xfrm_state and xfrm_tmpl */
2008static void
2009__xfrm6_sort(void **dst, void **src, int n,
2010	     int (*cmp)(const void *p), int maxclass)
2011{
2012	int count[XFRM_MAX_DEPTH] = { };
2013	int class[XFRM_MAX_DEPTH];
2014	int i;
2015
2016	for (i = 0; i < n; i++) {
2017		int c = cmp(src[i]);
2018
2019		class[i] = c;
2020		count[c]++;
2021	}
2022
2023	for (i = 2; i < maxclass; i++)
2024		count[i] += count[i - 1];
2025
2026	for (i = 0; i < n; i++) {
2027		dst[count[class[i] - 1]++] = src[i];
2028		src[i] = NULL;
2029	}
2030}
2031
2032/* Rule for xfrm_state:
2033 *
2034 * rule 1: select IPsec transport except AH
2035 * rule 2: select MIPv6 RO or inbound trigger
2036 * rule 3: select IPsec transport AH
2037 * rule 4: select IPsec tunnel
2038 * rule 5: others
2039 */
2040static int __xfrm6_state_sort_cmp(const void *p)
2041{
2042	const struct xfrm_state *v = p;
2043
2044	switch (v->props.mode) {
2045	case XFRM_MODE_TRANSPORT:
2046		if (v->id.proto != IPPROTO_AH)
2047			return 1;
2048		else
2049			return 3;
2050#if IS_ENABLED(CONFIG_IPV6_MIP6)
2051	case XFRM_MODE_ROUTEOPTIMIZATION:
2052	case XFRM_MODE_IN_TRIGGER:
2053		return 2;
2054#endif
2055	case XFRM_MODE_TUNNEL:
2056	case XFRM_MODE_BEET:
2057		return 4;
2058	}
2059	return 5;
2060}
2061
2062/* Rule for xfrm_tmpl:
2063 *
2064 * rule 1: select IPsec transport
2065 * rule 2: select MIPv6 RO or inbound trigger
2066 * rule 3: select IPsec tunnel
2067 * rule 4: others
2068 */
2069static int __xfrm6_tmpl_sort_cmp(const void *p)
2070{
2071	const struct xfrm_tmpl *v = p;
2072
2073	switch (v->mode) {
2074	case XFRM_MODE_TRANSPORT:
2075		return 1;
2076#if IS_ENABLED(CONFIG_IPV6_MIP6)
2077	case XFRM_MODE_ROUTEOPTIMIZATION:
2078	case XFRM_MODE_IN_TRIGGER:
2079		return 2;
2080#endif
2081	case XFRM_MODE_TUNNEL:
2082	case XFRM_MODE_BEET:
2083		return 3;
2084	}
2085	return 4;
2086}
2087#else
2088static inline int __xfrm6_state_sort_cmp(const void *p) { return 5; }
2089static inline int __xfrm6_tmpl_sort_cmp(const void *p) { return 4; }
2090
2091static inline void
2092__xfrm6_sort(void **dst, void **src, int n,
2093	     int (*cmp)(const void *p), int maxclass)
2094{
2095	int i;
2096
2097	for (i = 0; i < n; i++)
2098		dst[i] = src[i];
2099}
2100#endif /* CONFIG_IPV6 */
2101
2102void
2103xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
2104	       unsigned short family)
2105{
2106	int i;
2107
2108	if (family == AF_INET6)
2109		__xfrm6_sort((void **)dst, (void **)src, n,
2110			     __xfrm6_tmpl_sort_cmp, 5);
2111	else
2112		for (i = 0; i < n; i++)
2113			dst[i] = src[i];
2114}
2115
2116void
2117xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
2118		unsigned short family)
2119{
2120	int i;
2121
2122	if (family == AF_INET6)
2123		__xfrm6_sort((void **)dst, (void **)src, n,
2124			     __xfrm6_state_sort_cmp, 6);
2125	else
2126		for (i = 0; i < n; i++)
2127			dst[i] = src[i];
2128}
2129#endif
2130
2131/* Silly enough, but I'm lazy to build resolution list */
2132
2133static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
2134{
2135	unsigned int h = xfrm_seq_hash(net, seq);
2136	struct xfrm_state *x;
2137
2138	hlist_for_each_entry_rcu(x, net->xfrm.state_byseq + h, byseq) {
2139		if (x->km.seq == seq &&
2140		    (mark & x->mark.m) == x->mark.v &&
2141		    x->km.state == XFRM_STATE_ACQ) {
2142			xfrm_state_hold(x);
2143			return x;
2144		}
2145	}
2146
2147	return NULL;
2148}
2149
2150struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
2151{
2152	struct xfrm_state *x;
2153
2154	spin_lock_bh(&net->xfrm.xfrm_state_lock);
2155	x = __xfrm_find_acq_byseq(net, mark, seq);
2156	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2157	return x;
2158}
2159EXPORT_SYMBOL(xfrm_find_acq_byseq);
2160
2161u32 xfrm_get_acqseq(void)
2162{
2163	u32 res;
2164	static atomic_t acqseq;
2165
2166	do {
2167		res = atomic_inc_return(&acqseq);
2168	} while (!res);
2169
2170	return res;
2171}
2172EXPORT_SYMBOL(xfrm_get_acqseq);
2173
2174int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack)
2175{
2176	switch (proto) {
2177	case IPPROTO_AH:
2178	case IPPROTO_ESP:
2179		break;
2180
2181	case IPPROTO_COMP:
2182		/* IPCOMP spi is 16-bits. */
2183		if (max >= 0x10000) {
2184			NL_SET_ERR_MSG(extack, "IPCOMP SPI must be <= 65535");
2185			return -EINVAL;
2186		}
2187		break;
2188
2189	default:
2190		NL_SET_ERR_MSG(extack, "Invalid protocol, must be one of AH, ESP, IPCOMP");
2191		return -EINVAL;
2192	}
2193
2194	if (min > max) {
2195		NL_SET_ERR_MSG(extack, "Invalid SPI range: min > max");
2196		return -EINVAL;
2197	}
2198
2199	return 0;
2200}
2201EXPORT_SYMBOL(verify_spi_info);
2202
2203int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high,
2204		   struct netlink_ext_ack *extack)
2205{
2206	struct net *net = xs_net(x);
2207	unsigned int h;
2208	struct xfrm_state *x0;
2209	int err = -ENOENT;
2210	__be32 minspi = htonl(low);
2211	__be32 maxspi = htonl(high);
2212	__be32 newspi = 0;
2213	u32 mark = x->mark.v & x->mark.m;
2214
2215	spin_lock_bh(&x->lock);
2216	if (x->km.state == XFRM_STATE_DEAD) {
2217		NL_SET_ERR_MSG(extack, "Target ACQUIRE is in DEAD state");
2218		goto unlock;
2219	}
2220
2221	err = 0;
2222	if (x->id.spi)
2223		goto unlock;
2224
2225	err = -ENOENT;
2226
2227	if (minspi == maxspi) {
2228		x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
2229		if (x0) {
2230			NL_SET_ERR_MSG(extack, "Requested SPI is already in use");
2231			xfrm_state_put(x0);
2232			goto unlock;
2233		}
2234		newspi = minspi;
2235	} else {
2236		u32 spi = 0;
2237		for (h = 0; h < high-low+1; h++) {
2238			spi = get_random_u32_inclusive(low, high);
2239			x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
2240			if (x0 == NULL) {
2241				newspi = htonl(spi);
2242				break;
2243			}
2244			xfrm_state_put(x0);
2245		}
2246	}
2247	if (newspi) {
2248		spin_lock_bh(&net->xfrm.xfrm_state_lock);
2249		x->id.spi = newspi;
2250		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
2251		XFRM_STATE_INSERT(byspi, &x->byspi, net->xfrm.state_byspi + h,
2252				  x->xso.type);
2253		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2254
2255		err = 0;
2256	} else {
2257		NL_SET_ERR_MSG(extack, "No SPI available in the requested range");
2258	}
2259
2260unlock:
2261	spin_unlock_bh(&x->lock);
2262
2263	return err;
2264}
2265EXPORT_SYMBOL(xfrm_alloc_spi);
2266
2267static bool __xfrm_state_filter_match(struct xfrm_state *x,
2268				      struct xfrm_address_filter *filter)
2269{
2270	if (filter) {
2271		if ((filter->family == AF_INET ||
2272		     filter->family == AF_INET6) &&
2273		    x->props.family != filter->family)
2274			return false;
2275
2276		return addr_match(&x->props.saddr, &filter->saddr,
2277				  filter->splen) &&
2278		       addr_match(&x->id.daddr, &filter->daddr,
2279				  filter->dplen);
2280	}
2281	return true;
2282}
2283
2284int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
2285		    int (*func)(struct xfrm_state *, int, void*),
2286		    void *data)
2287{
2288	struct xfrm_state *state;
2289	struct xfrm_state_walk *x;
2290	int err = 0;
2291
2292	if (walk->seq != 0 && list_empty(&walk->all))
2293		return 0;
2294
2295	spin_lock_bh(&net->xfrm.xfrm_state_lock);
2296	if (list_empty(&walk->all))
2297		x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
2298	else
2299		x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
2300	list_for_each_entry_from(x, &net->xfrm.state_all, all) {
2301		if (x->state == XFRM_STATE_DEAD)
2302			continue;
2303		state = container_of(x, struct xfrm_state, km);
2304		if (!xfrm_id_proto_match(state->id.proto, walk->proto))
2305			continue;
2306		if (!__xfrm_state_filter_match(state, walk->filter))
2307			continue;
2308		err = func(state, walk->seq, data);
2309		if (err) {
2310			list_move_tail(&walk->all, &x->all);
2311			goto out;
2312		}
2313		walk->seq++;
2314	}
2315	if (walk->seq == 0) {
2316		err = -ENOENT;
2317		goto out;
2318	}
2319	list_del_init(&walk->all);
2320out:
2321	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2322	return err;
2323}
2324EXPORT_SYMBOL(xfrm_state_walk);
2325
2326void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
2327			  struct xfrm_address_filter *filter)
2328{
2329	INIT_LIST_HEAD(&walk->all);
2330	walk->proto = proto;
2331	walk->state = XFRM_STATE_DEAD;
2332	walk->seq = 0;
2333	walk->filter = filter;
2334}
2335EXPORT_SYMBOL(xfrm_state_walk_init);
2336
2337void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
2338{
2339	kfree(walk->filter);
2340
2341	if (list_empty(&walk->all))
2342		return;
2343
2344	spin_lock_bh(&net->xfrm.xfrm_state_lock);
2345	list_del(&walk->all);
2346	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
2347}
2348EXPORT_SYMBOL(xfrm_state_walk_done);
2349
2350static void xfrm_replay_timer_handler(struct timer_list *t)
2351{
2352	struct xfrm_state *x = from_timer(x, t, rtimer);
2353
2354	spin_lock(&x->lock);
2355
2356	if (x->km.state == XFRM_STATE_VALID) {
2357		if (xfrm_aevent_is_on(xs_net(x)))
2358			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
2359		else
2360			x->xflags |= XFRM_TIME_DEFER;
2361	}
2362
2363	spin_unlock(&x->lock);
2364}
2365
2366static LIST_HEAD(xfrm_km_list);
2367
2368void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
2369{
2370	struct xfrm_mgr *km;
2371
2372	rcu_read_lock();
2373	list_for_each_entry_rcu(km, &xfrm_km_list, list)
2374		if (km->notify_policy)
2375			km->notify_policy(xp, dir, c);
2376	rcu_read_unlock();
2377}
2378
2379void km_state_notify(struct xfrm_state *x, const struct km_event *c)
2380{
2381	struct xfrm_mgr *km;
2382	rcu_read_lock();
2383	list_for_each_entry_rcu(km, &xfrm_km_list, list)
2384		if (km->notify)
2385			km->notify(x, c);
2386	rcu_read_unlock();
2387}
2388
2389EXPORT_SYMBOL(km_policy_notify);
2390EXPORT_SYMBOL(km_state_notify);
2391
2392void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
2393{
2394	struct km_event c;
2395
2396	c.data.hard = hard;
2397	c.portid = portid;
2398	c.event = XFRM_MSG_EXPIRE;
2399	km_state_notify(x, &c);
2400}
2401
2402EXPORT_SYMBOL(km_state_expired);
2403/*
2404 * We send to all registered managers regardless of failure
2405 * We are happy with one success
2406*/
2407int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
2408{
2409	int err = -EINVAL, acqret;
2410	struct xfrm_mgr *km;
2411
2412	rcu_read_lock();
2413	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2414		acqret = km->acquire(x, t, pol);
2415		if (!acqret)
2416			err = acqret;
2417	}
2418	rcu_read_unlock();
2419	return err;
2420}
2421EXPORT_SYMBOL(km_query);
2422
2423static int __km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
2424{
2425	int err = -EINVAL;
2426	struct xfrm_mgr *km;
2427
2428	rcu_read_lock();
2429	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2430		if (km->new_mapping)
2431			err = km->new_mapping(x, ipaddr, sport);
2432		if (!err)
2433			break;
2434	}
2435	rcu_read_unlock();
2436	return err;
2437}
2438
2439int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
2440{
2441	int ret = 0;
2442
2443	if (x->mapping_maxage) {
2444		if ((jiffies / HZ - x->new_mapping) > x->mapping_maxage ||
2445		    x->new_mapping_sport != sport) {
2446			x->new_mapping_sport = sport;
2447			x->new_mapping = jiffies / HZ;
2448			ret = __km_new_mapping(x, ipaddr, sport);
2449		}
2450	} else {
2451		ret = __km_new_mapping(x, ipaddr, sport);
2452	}
2453
2454	return ret;
2455}
2456EXPORT_SYMBOL(km_new_mapping);
2457
2458void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
2459{
2460	struct km_event c;
2461
2462	c.data.hard = hard;
2463	c.portid = portid;
2464	c.event = XFRM_MSG_POLEXPIRE;
2465	km_policy_notify(pol, dir, &c);
2466}
2467EXPORT_SYMBOL(km_policy_expired);
2468
2469#ifdef CONFIG_XFRM_MIGRATE
2470int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2471	       const struct xfrm_migrate *m, int num_migrate,
2472	       const struct xfrm_kmaddress *k,
2473	       const struct xfrm_encap_tmpl *encap)
2474{
2475	int err = -EINVAL;
2476	int ret;
2477	struct xfrm_mgr *km;
2478
2479	rcu_read_lock();
2480	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2481		if (km->migrate) {
2482			ret = km->migrate(sel, dir, type, m, num_migrate, k,
2483					  encap);
2484			if (!ret)
2485				err = ret;
2486		}
2487	}
2488	rcu_read_unlock();
2489	return err;
2490}
2491EXPORT_SYMBOL(km_migrate);
2492#endif
2493
2494int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
2495{
2496	int err = -EINVAL;
2497	int ret;
2498	struct xfrm_mgr *km;
2499
2500	rcu_read_lock();
2501	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2502		if (km->report) {
2503			ret = km->report(net, proto, sel, addr);
2504			if (!ret)
2505				err = ret;
2506		}
2507	}
2508	rcu_read_unlock();
2509	return err;
2510}
2511EXPORT_SYMBOL(km_report);
2512
2513static bool km_is_alive(const struct km_event *c)
2514{
2515	struct xfrm_mgr *km;
2516	bool is_alive = false;
2517
2518	rcu_read_lock();
2519	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2520		if (km->is_alive && km->is_alive(c)) {
2521			is_alive = true;
2522			break;
2523		}
2524	}
2525	rcu_read_unlock();
2526
2527	return is_alive;
2528}
2529
2530#if IS_ENABLED(CONFIG_XFRM_USER_COMPAT)
2531static DEFINE_SPINLOCK(xfrm_translator_lock);
2532static struct xfrm_translator __rcu *xfrm_translator;
2533
2534struct xfrm_translator *xfrm_get_translator(void)
2535{
2536	struct xfrm_translator *xtr;
2537
2538	rcu_read_lock();
2539	xtr = rcu_dereference(xfrm_translator);
2540	if (unlikely(!xtr))
2541		goto out;
2542	if (!try_module_get(xtr->owner))
2543		xtr = NULL;
2544out:
2545	rcu_read_unlock();
2546	return xtr;
2547}
2548EXPORT_SYMBOL_GPL(xfrm_get_translator);
2549
2550void xfrm_put_translator(struct xfrm_translator *xtr)
2551{
2552	module_put(xtr->owner);
2553}
2554EXPORT_SYMBOL_GPL(xfrm_put_translator);
2555
2556int xfrm_register_translator(struct xfrm_translator *xtr)
2557{
2558	int err = 0;
2559
2560	spin_lock_bh(&xfrm_translator_lock);
2561	if (unlikely(xfrm_translator != NULL))
2562		err = -EEXIST;
2563	else
2564		rcu_assign_pointer(xfrm_translator, xtr);
2565	spin_unlock_bh(&xfrm_translator_lock);
2566
2567	return err;
2568}
2569EXPORT_SYMBOL_GPL(xfrm_register_translator);
2570
2571int xfrm_unregister_translator(struct xfrm_translator *xtr)
2572{
2573	int err = 0;
2574
2575	spin_lock_bh(&xfrm_translator_lock);
2576	if (likely(xfrm_translator != NULL)) {
2577		if (rcu_access_pointer(xfrm_translator) != xtr)
2578			err = -EINVAL;
2579		else
2580			RCU_INIT_POINTER(xfrm_translator, NULL);
2581	}
2582	spin_unlock_bh(&xfrm_translator_lock);
2583	synchronize_rcu();
2584
2585	return err;
2586}
2587EXPORT_SYMBOL_GPL(xfrm_unregister_translator);
2588#endif
2589
2590int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen)
2591{
2592	int err;
2593	u8 *data;
2594	struct xfrm_mgr *km;
2595	struct xfrm_policy *pol = NULL;
2596
2597	if (sockptr_is_null(optval) && !optlen) {
2598		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
2599		xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
2600		__sk_dst_reset(sk);
2601		return 0;
2602	}
2603
2604	if (optlen <= 0 || optlen > PAGE_SIZE)
2605		return -EMSGSIZE;
2606
2607	data = memdup_sockptr(optval, optlen);
2608	if (IS_ERR(data))
2609		return PTR_ERR(data);
2610
2611	if (in_compat_syscall()) {
2612		struct xfrm_translator *xtr = xfrm_get_translator();
2613
2614		if (!xtr) {
2615			kfree(data);
2616			return -EOPNOTSUPP;
2617		}
2618
2619		err = xtr->xlate_user_policy_sockptr(&data, optlen);
2620		xfrm_put_translator(xtr);
2621		if (err) {
2622			kfree(data);
2623			return err;
2624		}
2625	}
2626
2627	err = -EINVAL;
2628	rcu_read_lock();
2629	list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2630		pol = km->compile_policy(sk, optname, data,
2631					 optlen, &err);
2632		if (err >= 0)
2633			break;
2634	}
2635	rcu_read_unlock();
2636
2637	if (err >= 0) {
2638		xfrm_sk_policy_insert(sk, err, pol);
2639		xfrm_pol_put(pol);
2640		__sk_dst_reset(sk);
2641		err = 0;
2642	}
2643
2644	kfree(data);
2645	return err;
2646}
2647EXPORT_SYMBOL(xfrm_user_policy);
2648
2649static DEFINE_SPINLOCK(xfrm_km_lock);
2650
2651void xfrm_register_km(struct xfrm_mgr *km)
2652{
2653	spin_lock_bh(&xfrm_km_lock);
2654	list_add_tail_rcu(&km->list, &xfrm_km_list);
2655	spin_unlock_bh(&xfrm_km_lock);
2656}
2657EXPORT_SYMBOL(xfrm_register_km);
2658
2659void xfrm_unregister_km(struct xfrm_mgr *km)
2660{
2661	spin_lock_bh(&xfrm_km_lock);
2662	list_del_rcu(&km->list);
2663	spin_unlock_bh(&xfrm_km_lock);
2664	synchronize_rcu();
2665}
2666EXPORT_SYMBOL(xfrm_unregister_km);
2667
2668int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
2669{
2670	int err = 0;
2671
2672	if (WARN_ON(afinfo->family >= NPROTO))
2673		return -EAFNOSUPPORT;
2674
2675	spin_lock_bh(&xfrm_state_afinfo_lock);
2676	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
2677		err = -EEXIST;
2678	else
2679		rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
2680	spin_unlock_bh(&xfrm_state_afinfo_lock);
2681	return err;
2682}
2683EXPORT_SYMBOL(xfrm_state_register_afinfo);
2684
2685int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
2686{
2687	int err = 0, family = afinfo->family;
2688
2689	if (WARN_ON(family >= NPROTO))
2690		return -EAFNOSUPPORT;
2691
2692	spin_lock_bh(&xfrm_state_afinfo_lock);
2693	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
2694		if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
2695			err = -EINVAL;
2696		else
2697			RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
2698	}
2699	spin_unlock_bh(&xfrm_state_afinfo_lock);
2700	synchronize_rcu();
2701	return err;
2702}
2703EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
2704
2705struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
2706{
2707	if (unlikely(family >= NPROTO))
2708		return NULL;
2709
2710	return rcu_dereference(xfrm_state_afinfo[family]);
2711}
2712EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu);
2713
2714struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
2715{
2716	struct xfrm_state_afinfo *afinfo;
2717	if (unlikely(family >= NPROTO))
2718		return NULL;
2719	rcu_read_lock();
2720	afinfo = rcu_dereference(xfrm_state_afinfo[family]);
2721	if (unlikely(!afinfo))
2722		rcu_read_unlock();
2723	return afinfo;
2724}
2725
2726void xfrm_flush_gc(void)
2727{
2728	flush_work(&xfrm_state_gc_work);
2729}
2730EXPORT_SYMBOL(xfrm_flush_gc);
2731
2732/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
2733void xfrm_state_delete_tunnel(struct xfrm_state *x)
2734{
2735	if (x->tunnel) {
2736		struct xfrm_state *t = x->tunnel;
2737
2738		if (atomic_read(&t->tunnel_users) == 2)
2739			xfrm_state_delete(t);
2740		atomic_dec(&t->tunnel_users);
2741		xfrm_state_put_sync(t);
2742		x->tunnel = NULL;
2743	}
2744}
2745EXPORT_SYMBOL(xfrm_state_delete_tunnel);
2746
2747u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
2748{
2749	const struct xfrm_type *type = READ_ONCE(x->type);
2750	struct crypto_aead *aead;
2751	u32 blksize, net_adj = 0;
2752
2753	if (x->km.state != XFRM_STATE_VALID ||
2754	    !type || type->proto != IPPROTO_ESP)
2755		return mtu - x->props.header_len;
2756
2757	aead = x->data;
2758	blksize = ALIGN(crypto_aead_blocksize(aead), 4);
2759
2760	switch (x->props.mode) {
2761	case XFRM_MODE_TRANSPORT:
2762	case XFRM_MODE_BEET:
2763		if (x->props.family == AF_INET)
2764			net_adj = sizeof(struct iphdr);
2765		else if (x->props.family == AF_INET6)
2766			net_adj = sizeof(struct ipv6hdr);
2767		break;
2768	case XFRM_MODE_TUNNEL:
2769		break;
2770	default:
2771		WARN_ON_ONCE(1);
2772		break;
2773	}
2774
2775	return ((mtu - x->props.header_len - crypto_aead_authsize(aead) -
2776		 net_adj) & ~(blksize - 1)) + net_adj - 2;
2777}
2778EXPORT_SYMBOL_GPL(xfrm_state_mtu);
2779
2780int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
2781		      struct netlink_ext_ack *extack)
2782{
2783	const struct xfrm_mode *inner_mode;
2784	const struct xfrm_mode *outer_mode;
2785	int family = x->props.family;
2786	int err;
2787
2788	if (family == AF_INET &&
2789	    READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
2790		x->props.flags |= XFRM_STATE_NOPMTUDISC;
2791
2792	err = -EPROTONOSUPPORT;
2793
2794	if (x->sel.family != AF_UNSPEC) {
2795		inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
2796		if (inner_mode == NULL) {
2797			NL_SET_ERR_MSG(extack, "Requested mode not found");
2798			goto error;
2799		}
2800
2801		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2802		    family != x->sel.family) {
2803			NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate a change of family");
2804			goto error;
2805		}
2806
2807		x->inner_mode = *inner_mode;
2808	} else {
2809		const struct xfrm_mode *inner_mode_iaf;
2810		int iafamily = AF_INET;
2811
2812		inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
2813		if (inner_mode == NULL) {
2814			NL_SET_ERR_MSG(extack, "Requested mode not found");
2815			goto error;
2816		}
2817
2818		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
2819			NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate an AF_UNSPEC selector");
2820			goto error;
2821		}
2822
2823		x->inner_mode = *inner_mode;
2824
2825		if (x->props.family == AF_INET)
2826			iafamily = AF_INET6;
2827
2828		inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
2829		if (inner_mode_iaf) {
2830			if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
2831				x->inner_mode_iaf = *inner_mode_iaf;
2832		}
2833	}
2834
2835	x->type = xfrm_get_type(x->id.proto, family);
2836	if (x->type == NULL) {
2837		NL_SET_ERR_MSG(extack, "Requested type not found");
2838		goto error;
2839	}
2840
2841	x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
2842
2843	err = x->type->init_state(x, extack);
2844	if (err)
2845		goto error;
2846
2847	outer_mode = xfrm_get_mode(x->props.mode, family);
2848	if (!outer_mode) {
2849		NL_SET_ERR_MSG(extack, "Requested mode not found");
2850		err = -EPROTONOSUPPORT;
2851		goto error;
2852	}
2853
2854	x->outer_mode = *outer_mode;
2855	if (init_replay) {
2856		err = xfrm_init_replay(x, extack);
2857		if (err)
2858			goto error;
2859	}
2860
2861error:
2862	return err;
2863}
2864
2865EXPORT_SYMBOL(__xfrm_init_state);
2866
2867int xfrm_init_state(struct xfrm_state *x)
2868{
2869	int err;
2870
2871	err = __xfrm_init_state(x, true, false, NULL);
2872	if (!err)
2873		x->km.state = XFRM_STATE_VALID;
2874
2875	return err;
2876}
2877
2878EXPORT_SYMBOL(xfrm_init_state);
2879
2880int __net_init xfrm_state_init(struct net *net)
2881{
2882	unsigned int sz;
2883
2884	if (net_eq(net, &init_net))
2885		xfrm_state_cache = KMEM_CACHE(xfrm_state,
2886					      SLAB_HWCACHE_ALIGN | SLAB_PANIC);
2887
2888	INIT_LIST_HEAD(&net->xfrm.state_all);
2889
2890	sz = sizeof(struct hlist_head) * 8;
2891
2892	net->xfrm.state_bydst = xfrm_hash_alloc(sz);
2893	if (!net->xfrm.state_bydst)
2894		goto out_bydst;
2895	net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
2896	if (!net->xfrm.state_bysrc)
2897		goto out_bysrc;
2898	net->xfrm.state_byspi = xfrm_hash_alloc(sz);
2899	if (!net->xfrm.state_byspi)
2900		goto out_byspi;
2901	net->xfrm.state_byseq = xfrm_hash_alloc(sz);
2902	if (!net->xfrm.state_byseq)
2903		goto out_byseq;
2904	net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
2905
2906	net->xfrm.state_num = 0;
2907	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2908	spin_lock_init(&net->xfrm.xfrm_state_lock);
2909	seqcount_spinlock_init(&net->xfrm.xfrm_state_hash_generation,
2910			       &net->xfrm.xfrm_state_lock);
2911	return 0;
2912
2913out_byseq:
2914	xfrm_hash_free(net->xfrm.state_byspi, sz);
2915out_byspi:
2916	xfrm_hash_free(net->xfrm.state_bysrc, sz);
2917out_bysrc:
2918	xfrm_hash_free(net->xfrm.state_bydst, sz);
2919out_bydst:
2920	return -ENOMEM;
2921}
2922
2923void xfrm_state_fini(struct net *net)
2924{
2925	unsigned int sz;
2926
2927	flush_work(&net->xfrm.state_hash_work);
2928	flush_work(&xfrm_state_gc_work);
2929	xfrm_state_flush(net, 0, false, true);
2930
2931	WARN_ON(!list_empty(&net->xfrm.state_all));
2932
2933	sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
2934	WARN_ON(!hlist_empty(net->xfrm.state_byseq));
2935	xfrm_hash_free(net->xfrm.state_byseq, sz);
2936	WARN_ON(!hlist_empty(net->xfrm.state_byspi));
2937	xfrm_hash_free(net->xfrm.state_byspi, sz);
2938	WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
2939	xfrm_hash_free(net->xfrm.state_bysrc, sz);
2940	WARN_ON(!hlist_empty(net->xfrm.state_bydst));
2941	xfrm_hash_free(net->xfrm.state_bydst, sz);
2942}
2943
2944#ifdef CONFIG_AUDITSYSCALL
2945static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2946				     struct audit_buffer *audit_buf)
2947{
2948	struct xfrm_sec_ctx *ctx = x->security;
2949	u32 spi = ntohl(x->id.spi);
2950
2951	if (ctx)
2952		audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2953				 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2954
2955	switch (x->props.family) {
2956	case AF_INET:
2957		audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2958				 &x->props.saddr.a4, &x->id.daddr.a4);
2959		break;
2960	case AF_INET6:
2961		audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
2962				 x->props.saddr.a6, x->id.daddr.a6);
2963		break;
2964	}
2965
2966	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2967}
2968
2969static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2970				      struct audit_buffer *audit_buf)
2971{
2972	const struct iphdr *iph4;
2973	const struct ipv6hdr *iph6;
2974
2975	switch (family) {
2976	case AF_INET:
2977		iph4 = ip_hdr(skb);
2978		audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2979				 &iph4->saddr, &iph4->daddr);
2980		break;
2981	case AF_INET6:
2982		iph6 = ipv6_hdr(skb);
2983		audit_log_format(audit_buf,
2984				 " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
2985				 &iph6->saddr, &iph6->daddr,
2986				 iph6->flow_lbl[0] & 0x0f,
2987				 iph6->flow_lbl[1],
2988				 iph6->flow_lbl[2]);
2989		break;
2990	}
2991}
2992
2993void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
2994{
2995	struct audit_buffer *audit_buf;
2996
2997	audit_buf = xfrm_audit_start("SAD-add");
2998	if (audit_buf == NULL)
2999		return;
3000	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3001	xfrm_audit_helper_sainfo(x, audit_buf);
3002	audit_log_format(audit_buf, " res=%u", result);
3003	audit_log_end(audit_buf);
3004}
3005EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
3006
3007void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
3008{
3009	struct audit_buffer *audit_buf;
3010
3011	audit_buf = xfrm_audit_start("SAD-delete");
3012	if (audit_buf == NULL)
3013		return;
3014	xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3015	xfrm_audit_helper_sainfo(x, audit_buf);
3016	audit_log_format(audit_buf, " res=%u", result);
3017	audit_log_end(audit_buf);
3018}
3019EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
3020
3021void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
3022				      struct sk_buff *skb)
3023{
3024	struct audit_buffer *audit_buf;
3025	u32 spi;
3026
3027	audit_buf = xfrm_audit_start("SA-replay-overflow");
3028	if (audit_buf == NULL)
3029		return;
3030	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
3031	/* don't record the sequence number because it's inherent in this kind
3032	 * of audit message */
3033	spi = ntohl(x->id.spi);
3034	audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
3035	audit_log_end(audit_buf);
3036}
3037EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
3038
3039void xfrm_audit_state_replay(struct xfrm_state *x,
3040			     struct sk_buff *skb, __be32 net_seq)
3041{
3042	struct audit_buffer *audit_buf;
3043	u32 spi;
3044
3045	audit_buf = xfrm_audit_start("SA-replayed-pkt");
3046	if (audit_buf == NULL)
3047		return;
3048	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
3049	spi = ntohl(x->id.spi);
3050	audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
3051			 spi, spi, ntohl(net_seq));
3052	audit_log_end(audit_buf);
3053}
3054EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
3055
3056void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
3057{
3058	struct audit_buffer *audit_buf;
3059
3060	audit_buf = xfrm_audit_start("SA-notfound");
3061	if (audit_buf == NULL)
3062		return;
3063	xfrm_audit_helper_pktinfo(skb, family, audit_buf);
3064	audit_log_end(audit_buf);
3065}
3066EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
3067
3068void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
3069			       __be32 net_spi, __be32 net_seq)
3070{
3071	struct audit_buffer *audit_buf;
3072	u32 spi;
3073
3074	audit_buf = xfrm_audit_start("SA-notfound");
3075	if (audit_buf == NULL)
3076		return;
3077	xfrm_audit_helper_pktinfo(skb, family, audit_buf);
3078	spi = ntohl(net_spi);
3079	audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
3080			 spi, spi, ntohl(net_seq));
3081	audit_log_end(audit_buf);
3082}
3083EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
3084
3085void xfrm_audit_state_icvfail(struct xfrm_state *x,
3086			      struct sk_buff *skb, u8 proto)
3087{
3088	struct audit_buffer *audit_buf;
3089	__be32 net_spi;
3090	__be32 net_seq;
3091
3092	audit_buf = xfrm_audit_start("SA-icv-failure");
3093	if (audit_buf == NULL)
3094		return;
3095	xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
3096	if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
3097		u32 spi = ntohl(net_spi);
3098		audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
3099				 spi, spi, ntohl(net_seq));
3100	}
3101	audit_log_end(audit_buf);
3102}
3103EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
3104#endif /* CONFIG_AUDITSYSCALL */