Linux Audio

Check our new training course

Loading...
   1/*
   2 *	NET3	IP device support routines.
   3 *
   4 *		This program is free software; you can redistribute it and/or
   5 *		modify it under the terms of the GNU General Public License
   6 *		as published by the Free Software Foundation; either version
   7 *		2 of the License, or (at your option) any later version.
   8 *
   9 *	Derived from the IP parts of dev.c 1.0.19
  10 * 		Authors:	Ross Biro
  11 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *	Additional Authors:
  15 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *	Changes:
  19 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
  20 *					lists.
  21 *		Cyrus Durgin:		updated for kmod
  22 *		Matthias Andree:	in devinet_ioctl, compare label and
  23 *					address (4.4BSD alias style support),
  24 *					fall back to comparing just the label
  25 *					if no match found.
  26 */
  27
  28
  29#include <asm/uaccess.h>
  30#include <asm/system.h>
  31#include <linux/bitops.h>
  32#include <linux/capability.h>
  33#include <linux/module.h>
  34#include <linux/types.h>
  35#include <linux/kernel.h>
  36#include <linux/string.h>
  37#include <linux/mm.h>
  38#include <linux/socket.h>
  39#include <linux/sockios.h>
  40#include <linux/in.h>
  41#include <linux/errno.h>
  42#include <linux/interrupt.h>
  43#include <linux/if_addr.h>
  44#include <linux/if_ether.h>
  45#include <linux/inet.h>
  46#include <linux/netdevice.h>
  47#include <linux/etherdevice.h>
  48#include <linux/skbuff.h>
  49#include <linux/init.h>
  50#include <linux/notifier.h>
  51#include <linux/inetdevice.h>
  52#include <linux/igmp.h>
  53#include <linux/slab.h>
  54#include <linux/hash.h>
  55#ifdef CONFIG_SYSCTL
  56#include <linux/sysctl.h>
  57#endif
  58#include <linux/kmod.h>
  59
  60#include <net/arp.h>
  61#include <net/ip.h>
  62#include <net/route.h>
  63#include <net/ip_fib.h>
  64#include <net/rtnetlink.h>
  65#include <net/net_namespace.h>
  66
  67#include "fib_lookup.h"
  68
  69static struct ipv4_devconf ipv4_devconf = {
  70	.data = {
  71		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  72		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  73		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  74		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  75	},
  76};
  77
  78static struct ipv4_devconf ipv4_devconf_dflt = {
  79	.data = {
  80		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  81		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  82		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  83		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  84		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  85	},
  86};
  87
  88#define IPV4_DEVCONF_DFLT(net, attr) \
  89	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  90
  91static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  92	[IFA_LOCAL]     	= { .type = NLA_U32 },
  93	[IFA_ADDRESS]   	= { .type = NLA_U32 },
  94	[IFA_BROADCAST] 	= { .type = NLA_U32 },
  95	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
  96};
  97
  98/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
  99 * value.  So if you change this define, make appropriate changes to
 100 * inet_addr_hash as well.
 101 */
 102#define IN4_ADDR_HSIZE	256
 103static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 104static DEFINE_SPINLOCK(inet_addr_hash_lock);
 105
 106static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
 107{
 108	u32 val = (__force u32) addr ^ hash_ptr(net, 8);
 109
 110	return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
 111		(IN4_ADDR_HSIZE - 1));
 112}
 113
 114static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 115{
 116	unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
 117
 118	spin_lock(&inet_addr_hash_lock);
 119	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 120	spin_unlock(&inet_addr_hash_lock);
 121}
 122
 123static void inet_hash_remove(struct in_ifaddr *ifa)
 124{
 125	spin_lock(&inet_addr_hash_lock);
 126	hlist_del_init_rcu(&ifa->hash);
 127	spin_unlock(&inet_addr_hash_lock);
 128}
 129
 130/**
 131 * __ip_dev_find - find the first device with a given source address.
 132 * @net: the net namespace
 133 * @addr: the source address
 134 * @devref: if true, take a reference on the found device
 135 *
 136 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 137 */
 138struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 139{
 140	unsigned int hash = inet_addr_hash(net, addr);
 141	struct net_device *result = NULL;
 142	struct in_ifaddr *ifa;
 143	struct hlist_node *node;
 144
 145	rcu_read_lock();
 146	hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
 147		struct net_device *dev = ifa->ifa_dev->dev;
 148
 149		if (!net_eq(dev_net(dev), net))
 150			continue;
 151		if (ifa->ifa_local == addr) {
 152			result = dev;
 153			break;
 154		}
 155	}
 156	if (!result) {
 157		struct flowi4 fl4 = { .daddr = addr };
 158		struct fib_result res = { 0 };
 159		struct fib_table *local;
 160
 161		/* Fallback to FIB local table so that communication
 162		 * over loopback subnets work.
 163		 */
 164		local = fib_get_table(net, RT_TABLE_LOCAL);
 165		if (local &&
 166		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 167		    res.type == RTN_LOCAL)
 168			result = FIB_RES_DEV(res);
 169	}
 170	if (result && devref)
 171		dev_hold(result);
 172	rcu_read_unlock();
 173	return result;
 174}
 175EXPORT_SYMBOL(__ip_dev_find);
 176
 177static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 178
 179static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 180static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 181			 int destroy);
 182#ifdef CONFIG_SYSCTL
 183static void devinet_sysctl_register(struct in_device *idev);
 184static void devinet_sysctl_unregister(struct in_device *idev);
 185#else
 186static inline void devinet_sysctl_register(struct in_device *idev)
 187{
 188}
 189static inline void devinet_sysctl_unregister(struct in_device *idev)
 190{
 191}
 192#endif
 193
 194/* Locks all the inet devices. */
 195
 196static struct in_ifaddr *inet_alloc_ifa(void)
 197{
 198	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 199}
 200
 201static void inet_rcu_free_ifa(struct rcu_head *head)
 202{
 203	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 204	if (ifa->ifa_dev)
 205		in_dev_put(ifa->ifa_dev);
 206	kfree(ifa);
 207}
 208
 209static inline void inet_free_ifa(struct in_ifaddr *ifa)
 210{
 211	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 212}
 213
 214void in_dev_finish_destroy(struct in_device *idev)
 215{
 216	struct net_device *dev = idev->dev;
 217
 218	WARN_ON(idev->ifa_list);
 219	WARN_ON(idev->mc_list);
 220#ifdef NET_REFCNT_DEBUG
 221	printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
 222	       idev, dev ? dev->name : "NIL");
 223#endif
 224	dev_put(dev);
 225	if (!idev->dead)
 226		pr_err("Freeing alive in_device %p\n", idev);
 227	else
 228		kfree(idev);
 229}
 230EXPORT_SYMBOL(in_dev_finish_destroy);
 231
 232static struct in_device *inetdev_init(struct net_device *dev)
 233{
 234	struct in_device *in_dev;
 235
 236	ASSERT_RTNL();
 237
 238	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 239	if (!in_dev)
 240		goto out;
 241	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 242			sizeof(in_dev->cnf));
 243	in_dev->cnf.sysctl = NULL;
 244	in_dev->dev = dev;
 245	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 246	if (!in_dev->arp_parms)
 247		goto out_kfree;
 248	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 249		dev_disable_lro(dev);
 250	/* Reference in_dev->dev */
 251	dev_hold(dev);
 252	/* Account for reference dev->ip_ptr (below) */
 253	in_dev_hold(in_dev);
 254
 255	devinet_sysctl_register(in_dev);
 256	ip_mc_init_dev(in_dev);
 257	if (dev->flags & IFF_UP)
 258		ip_mc_up(in_dev);
 259
 260	/* we can receive as soon as ip_ptr is set -- do this last */
 261	rcu_assign_pointer(dev->ip_ptr, in_dev);
 262out:
 263	return in_dev;
 264out_kfree:
 265	kfree(in_dev);
 266	in_dev = NULL;
 267	goto out;
 268}
 269
 270static void in_dev_rcu_put(struct rcu_head *head)
 271{
 272	struct in_device *idev = container_of(head, struct in_device, rcu_head);
 273	in_dev_put(idev);
 274}
 275
 276static void inetdev_destroy(struct in_device *in_dev)
 277{
 278	struct in_ifaddr *ifa;
 279	struct net_device *dev;
 280
 281	ASSERT_RTNL();
 282
 283	dev = in_dev->dev;
 284
 285	in_dev->dead = 1;
 286
 287	ip_mc_destroy_dev(in_dev);
 288
 289	while ((ifa = in_dev->ifa_list) != NULL) {
 290		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 291		inet_free_ifa(ifa);
 292	}
 293
 294	rcu_assign_pointer(dev->ip_ptr, NULL);
 295
 296	devinet_sysctl_unregister(in_dev);
 297	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 298	arp_ifdown(dev);
 299
 300	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 301}
 302
 303int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 304{
 305	rcu_read_lock();
 306	for_primary_ifa(in_dev) {
 307		if (inet_ifa_match(a, ifa)) {
 308			if (!b || inet_ifa_match(b, ifa)) {
 309				rcu_read_unlock();
 310				return 1;
 311			}
 312		}
 313	} endfor_ifa(in_dev);
 314	rcu_read_unlock();
 315	return 0;
 316}
 317
 318static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 319			 int destroy, struct nlmsghdr *nlh, u32 pid)
 320{
 321	struct in_ifaddr *promote = NULL;
 322	struct in_ifaddr *ifa, *ifa1 = *ifap;
 323	struct in_ifaddr *last_prim = in_dev->ifa_list;
 324	struct in_ifaddr *prev_prom = NULL;
 325	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 326
 327	ASSERT_RTNL();
 328
 329	/* 1. Deleting primary ifaddr forces deletion all secondaries
 330	 * unless alias promotion is set
 331	 **/
 332
 333	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 334		struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 335
 336		while ((ifa = *ifap1) != NULL) {
 337			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 338			    ifa1->ifa_scope <= ifa->ifa_scope)
 339				last_prim = ifa;
 340
 341			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 342			    ifa1->ifa_mask != ifa->ifa_mask ||
 343			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
 344				ifap1 = &ifa->ifa_next;
 345				prev_prom = ifa;
 346				continue;
 347			}
 348
 349			if (!do_promote) {
 350				inet_hash_remove(ifa);
 351				*ifap1 = ifa->ifa_next;
 352
 353				rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
 354				blocking_notifier_call_chain(&inetaddr_chain,
 355						NETDEV_DOWN, ifa);
 356				inet_free_ifa(ifa);
 357			} else {
 358				promote = ifa;
 359				break;
 360			}
 361		}
 362	}
 363
 364	/* On promotion all secondaries from subnet are changing
 365	 * the primary IP, we must remove all their routes silently
 366	 * and later to add them back with new prefsrc. Do this
 367	 * while all addresses are on the device list.
 368	 */
 369	for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 370		if (ifa1->ifa_mask == ifa->ifa_mask &&
 371		    inet_ifa_match(ifa1->ifa_address, ifa))
 372			fib_del_ifaddr(ifa, ifa1);
 373	}
 374
 375	/* 2. Unlink it */
 376
 377	*ifap = ifa1->ifa_next;
 378	inet_hash_remove(ifa1);
 379
 380	/* 3. Announce address deletion */
 381
 382	/* Send message first, then call notifier.
 383	   At first sight, FIB update triggered by notifier
 384	   will refer to already deleted ifaddr, that could confuse
 385	   netlink listeners. It is not true: look, gated sees
 386	   that route deleted and if it still thinks that ifaddr
 387	   is valid, it will try to restore deleted routes... Grr.
 388	   So that, this order is correct.
 389	 */
 390	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
 391	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 392
 393	if (promote) {
 394		struct in_ifaddr *next_sec = promote->ifa_next;
 395
 396		if (prev_prom) {
 397			prev_prom->ifa_next = promote->ifa_next;
 398			promote->ifa_next = last_prim->ifa_next;
 399			last_prim->ifa_next = promote;
 400		}
 401
 402		promote->ifa_flags &= ~IFA_F_SECONDARY;
 403		rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 404		blocking_notifier_call_chain(&inetaddr_chain,
 405				NETDEV_UP, promote);
 406		for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 407			if (ifa1->ifa_mask != ifa->ifa_mask ||
 408			    !inet_ifa_match(ifa1->ifa_address, ifa))
 409					continue;
 410			fib_add_ifaddr(ifa);
 411		}
 412
 413	}
 414	if (destroy)
 415		inet_free_ifa(ifa1);
 416}
 417
 418static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 419			 int destroy)
 420{
 421	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 422}
 423
 424static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 425			     u32 pid)
 426{
 427	struct in_device *in_dev = ifa->ifa_dev;
 428	struct in_ifaddr *ifa1, **ifap, **last_primary;
 429
 430	ASSERT_RTNL();
 431
 432	if (!ifa->ifa_local) {
 433		inet_free_ifa(ifa);
 434		return 0;
 435	}
 436
 437	ifa->ifa_flags &= ~IFA_F_SECONDARY;
 438	last_primary = &in_dev->ifa_list;
 439
 440	for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 441	     ifap = &ifa1->ifa_next) {
 442		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 443		    ifa->ifa_scope <= ifa1->ifa_scope)
 444			last_primary = &ifa1->ifa_next;
 445		if (ifa1->ifa_mask == ifa->ifa_mask &&
 446		    inet_ifa_match(ifa1->ifa_address, ifa)) {
 447			if (ifa1->ifa_local == ifa->ifa_local) {
 448				inet_free_ifa(ifa);
 449				return -EEXIST;
 450			}
 451			if (ifa1->ifa_scope != ifa->ifa_scope) {
 452				inet_free_ifa(ifa);
 453				return -EINVAL;
 454			}
 455			ifa->ifa_flags |= IFA_F_SECONDARY;
 456		}
 457	}
 458
 459	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 460		net_srandom(ifa->ifa_local);
 461		ifap = last_primary;
 462	}
 463
 464	ifa->ifa_next = *ifap;
 465	*ifap = ifa;
 466
 467	inet_hash_insert(dev_net(in_dev->dev), ifa);
 468
 469	/* Send message first, then call notifier.
 470	   Notifier will trigger FIB update, so that
 471	   listeners of netlink will know about new ifaddr */
 472	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
 473	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 474
 475	return 0;
 476}
 477
 478static int inet_insert_ifa(struct in_ifaddr *ifa)
 479{
 480	return __inet_insert_ifa(ifa, NULL, 0);
 481}
 482
 483static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 484{
 485	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 486
 487	ASSERT_RTNL();
 488
 489	if (!in_dev) {
 490		inet_free_ifa(ifa);
 491		return -ENOBUFS;
 492	}
 493	ipv4_devconf_setall(in_dev);
 494	if (ifa->ifa_dev != in_dev) {
 495		WARN_ON(ifa->ifa_dev);
 496		in_dev_hold(in_dev);
 497		ifa->ifa_dev = in_dev;
 498	}
 499	if (ipv4_is_loopback(ifa->ifa_local))
 500		ifa->ifa_scope = RT_SCOPE_HOST;
 501	return inet_insert_ifa(ifa);
 502}
 503
 504/* Caller must hold RCU or RTNL :
 505 * We dont take a reference on found in_device
 506 */
 507struct in_device *inetdev_by_index(struct net *net, int ifindex)
 508{
 509	struct net_device *dev;
 510	struct in_device *in_dev = NULL;
 511
 512	rcu_read_lock();
 513	dev = dev_get_by_index_rcu(net, ifindex);
 514	if (dev)
 515		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 516	rcu_read_unlock();
 517	return in_dev;
 518}
 519EXPORT_SYMBOL(inetdev_by_index);
 520
 521/* Called only from RTNL semaphored context. No locks. */
 522
 523struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 524				    __be32 mask)
 525{
 526	ASSERT_RTNL();
 527
 528	for_primary_ifa(in_dev) {
 529		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 530			return ifa;
 531	} endfor_ifa(in_dev);
 532	return NULL;
 533}
 534
 535static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 536{
 537	struct net *net = sock_net(skb->sk);
 538	struct nlattr *tb[IFA_MAX+1];
 539	struct in_device *in_dev;
 540	struct ifaddrmsg *ifm;
 541	struct in_ifaddr *ifa, **ifap;
 542	int err = -EINVAL;
 543
 544	ASSERT_RTNL();
 545
 546	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 547	if (err < 0)
 548		goto errout;
 549
 550	ifm = nlmsg_data(nlh);
 551	in_dev = inetdev_by_index(net, ifm->ifa_index);
 552	if (in_dev == NULL) {
 553		err = -ENODEV;
 554		goto errout;
 555	}
 556
 557	for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 558	     ifap = &ifa->ifa_next) {
 559		if (tb[IFA_LOCAL] &&
 560		    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
 561			continue;
 562
 563		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 564			continue;
 565
 566		if (tb[IFA_ADDRESS] &&
 567		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 568		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 569			continue;
 570
 571		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
 572		return 0;
 573	}
 574
 575	err = -EADDRNOTAVAIL;
 576errout:
 577	return err;
 578}
 579
 580static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 581{
 582	struct nlattr *tb[IFA_MAX+1];
 583	struct in_ifaddr *ifa;
 584	struct ifaddrmsg *ifm;
 585	struct net_device *dev;
 586	struct in_device *in_dev;
 587	int err;
 588
 589	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 590	if (err < 0)
 591		goto errout;
 592
 593	ifm = nlmsg_data(nlh);
 594	err = -EINVAL;
 595	if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
 596		goto errout;
 597
 598	dev = __dev_get_by_index(net, ifm->ifa_index);
 599	err = -ENODEV;
 600	if (dev == NULL)
 601		goto errout;
 602
 603	in_dev = __in_dev_get_rtnl(dev);
 604	err = -ENOBUFS;
 605	if (in_dev == NULL)
 606		goto errout;
 607
 608	ifa = inet_alloc_ifa();
 609	if (ifa == NULL)
 610		/*
 611		 * A potential indev allocation can be left alive, it stays
 612		 * assigned to its device and is destroy with it.
 613		 */
 614		goto errout;
 615
 616	ipv4_devconf_setall(in_dev);
 617	in_dev_hold(in_dev);
 618
 619	if (tb[IFA_ADDRESS] == NULL)
 620		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 621
 622	INIT_HLIST_NODE(&ifa->hash);
 623	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 624	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 625	ifa->ifa_flags = ifm->ifa_flags;
 626	ifa->ifa_scope = ifm->ifa_scope;
 627	ifa->ifa_dev = in_dev;
 628
 629	ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
 630	ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
 631
 632	if (tb[IFA_BROADCAST])
 633		ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
 634
 635	if (tb[IFA_LABEL])
 636		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 637	else
 638		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 639
 640	return ifa;
 641
 642errout:
 643	return ERR_PTR(err);
 644}
 645
 646static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 647{
 648	struct net *net = sock_net(skb->sk);
 649	struct in_ifaddr *ifa;
 650
 651	ASSERT_RTNL();
 652
 653	ifa = rtm_to_ifaddr(net, nlh);
 654	if (IS_ERR(ifa))
 655		return PTR_ERR(ifa);
 656
 657	return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
 658}
 659
 660/*
 661 *	Determine a default network mask, based on the IP address.
 662 */
 663
 664static inline int inet_abc_len(__be32 addr)
 665{
 666	int rc = -1;	/* Something else, probably a multicast. */
 667
 668	if (ipv4_is_zeronet(addr))
 669		rc = 0;
 670	else {
 671		__u32 haddr = ntohl(addr);
 672
 673		if (IN_CLASSA(haddr))
 674			rc = 8;
 675		else if (IN_CLASSB(haddr))
 676			rc = 16;
 677		else if (IN_CLASSC(haddr))
 678			rc = 24;
 679	}
 680
 681	return rc;
 682}
 683
 684
 685int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 686{
 687	struct ifreq ifr;
 688	struct sockaddr_in sin_orig;
 689	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
 690	struct in_device *in_dev;
 691	struct in_ifaddr **ifap = NULL;
 692	struct in_ifaddr *ifa = NULL;
 693	struct net_device *dev;
 694	char *colon;
 695	int ret = -EFAULT;
 696	int tryaddrmatch = 0;
 697
 698	/*
 699	 *	Fetch the caller's info block into kernel space
 700	 */
 701
 702	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 703		goto out;
 704	ifr.ifr_name[IFNAMSIZ - 1] = 0;
 705
 706	/* save original address for comparison */
 707	memcpy(&sin_orig, sin, sizeof(*sin));
 708
 709	colon = strchr(ifr.ifr_name, ':');
 710	if (colon)
 711		*colon = 0;
 712
 713	dev_load(net, ifr.ifr_name);
 714
 715	switch (cmd) {
 716	case SIOCGIFADDR:	/* Get interface address */
 717	case SIOCGIFBRDADDR:	/* Get the broadcast address */
 718	case SIOCGIFDSTADDR:	/* Get the destination address */
 719	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
 720		/* Note that these ioctls will not sleep,
 721		   so that we do not impose a lock.
 722		   One day we will be forced to put shlock here (I mean SMP)
 723		 */
 724		tryaddrmatch = (sin_orig.sin_family == AF_INET);
 725		memset(sin, 0, sizeof(*sin));
 726		sin->sin_family = AF_INET;
 727		break;
 728
 729	case SIOCSIFFLAGS:
 730		ret = -EACCES;
 731		if (!capable(CAP_NET_ADMIN))
 732			goto out;
 733		break;
 734	case SIOCSIFADDR:	/* Set interface address (and family) */
 735	case SIOCSIFBRDADDR:	/* Set the broadcast address */
 736	case SIOCSIFDSTADDR:	/* Set the destination address */
 737	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
 738		ret = -EACCES;
 739		if (!capable(CAP_NET_ADMIN))
 740			goto out;
 741		ret = -EINVAL;
 742		if (sin->sin_family != AF_INET)
 743			goto out;
 744		break;
 745	default:
 746		ret = -EINVAL;
 747		goto out;
 748	}
 749
 750	rtnl_lock();
 751
 752	ret = -ENODEV;
 753	dev = __dev_get_by_name(net, ifr.ifr_name);
 754	if (!dev)
 755		goto done;
 756
 757	if (colon)
 758		*colon = ':';
 759
 760	in_dev = __in_dev_get_rtnl(dev);
 761	if (in_dev) {
 762		if (tryaddrmatch) {
 763			/* Matthias Andree */
 764			/* compare label and address (4.4BSD style) */
 765			/* note: we only do this for a limited set of ioctls
 766			   and only if the original address family was AF_INET.
 767			   This is checked above. */
 768			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 769			     ifap = &ifa->ifa_next) {
 770				if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
 771				    sin_orig.sin_addr.s_addr ==
 772							ifa->ifa_local) {
 773					break; /* found */
 774				}
 775			}
 776		}
 777		/* we didn't get a match, maybe the application is
 778		   4.3BSD-style and passed in junk so we fall back to
 779		   comparing just the label */
 780		if (!ifa) {
 781			for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 782			     ifap = &ifa->ifa_next)
 783				if (!strcmp(ifr.ifr_name, ifa->ifa_label))
 784					break;
 785		}
 786	}
 787
 788	ret = -EADDRNOTAVAIL;
 789	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
 790		goto done;
 791
 792	switch (cmd) {
 793	case SIOCGIFADDR:	/* Get interface address */
 794		sin->sin_addr.s_addr = ifa->ifa_local;
 795		goto rarok;
 796
 797	case SIOCGIFBRDADDR:	/* Get the broadcast address */
 798		sin->sin_addr.s_addr = ifa->ifa_broadcast;
 799		goto rarok;
 800
 801	case SIOCGIFDSTADDR:	/* Get the destination address */
 802		sin->sin_addr.s_addr = ifa->ifa_address;
 803		goto rarok;
 804
 805	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
 806		sin->sin_addr.s_addr = ifa->ifa_mask;
 807		goto rarok;
 808
 809	case SIOCSIFFLAGS:
 810		if (colon) {
 811			ret = -EADDRNOTAVAIL;
 812			if (!ifa)
 813				break;
 814			ret = 0;
 815			if (!(ifr.ifr_flags & IFF_UP))
 816				inet_del_ifa(in_dev, ifap, 1);
 817			break;
 818		}
 819		ret = dev_change_flags(dev, ifr.ifr_flags);
 820		break;
 821
 822	case SIOCSIFADDR:	/* Set interface address (and family) */
 823		ret = -EINVAL;
 824		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 825			break;
 826
 827		if (!ifa) {
 828			ret = -ENOBUFS;
 829			ifa = inet_alloc_ifa();
 830			INIT_HLIST_NODE(&ifa->hash);
 831			if (!ifa)
 832				break;
 833			if (colon)
 834				memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
 835			else
 836				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 837		} else {
 838			ret = 0;
 839			if (ifa->ifa_local == sin->sin_addr.s_addr)
 840				break;
 841			inet_del_ifa(in_dev, ifap, 0);
 842			ifa->ifa_broadcast = 0;
 843			ifa->ifa_scope = 0;
 844		}
 845
 846		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
 847
 848		if (!(dev->flags & IFF_POINTOPOINT)) {
 849			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
 850			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
 851			if ((dev->flags & IFF_BROADCAST) &&
 852			    ifa->ifa_prefixlen < 31)
 853				ifa->ifa_broadcast = ifa->ifa_address |
 854						     ~ifa->ifa_mask;
 855		} else {
 856			ifa->ifa_prefixlen = 32;
 857			ifa->ifa_mask = inet_make_mask(32);
 858		}
 859		ret = inet_set_ifa(dev, ifa);
 860		break;
 861
 862	case SIOCSIFBRDADDR:	/* Set the broadcast address */
 863		ret = 0;
 864		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
 865			inet_del_ifa(in_dev, ifap, 0);
 866			ifa->ifa_broadcast = sin->sin_addr.s_addr;
 867			inet_insert_ifa(ifa);
 868		}
 869		break;
 870
 871	case SIOCSIFDSTADDR:	/* Set the destination address */
 872		ret = 0;
 873		if (ifa->ifa_address == sin->sin_addr.s_addr)
 874			break;
 875		ret = -EINVAL;
 876		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 877			break;
 878		ret = 0;
 879		inet_del_ifa(in_dev, ifap, 0);
 880		ifa->ifa_address = sin->sin_addr.s_addr;
 881		inet_insert_ifa(ifa);
 882		break;
 883
 884	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
 885
 886		/*
 887		 *	The mask we set must be legal.
 888		 */
 889		ret = -EINVAL;
 890		if (bad_mask(sin->sin_addr.s_addr, 0))
 891			break;
 892		ret = 0;
 893		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
 894			__be32 old_mask = ifa->ifa_mask;
 895			inet_del_ifa(in_dev, ifap, 0);
 896			ifa->ifa_mask = sin->sin_addr.s_addr;
 897			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
 898
 899			/* See if current broadcast address matches
 900			 * with current netmask, then recalculate
 901			 * the broadcast address. Otherwise it's a
 902			 * funny address, so don't touch it since
 903			 * the user seems to know what (s)he's doing...
 904			 */
 905			if ((dev->flags & IFF_BROADCAST) &&
 906			    (ifa->ifa_prefixlen < 31) &&
 907			    (ifa->ifa_broadcast ==
 908			     (ifa->ifa_local|~old_mask))) {
 909				ifa->ifa_broadcast = (ifa->ifa_local |
 910						      ~sin->sin_addr.s_addr);
 911			}
 912			inet_insert_ifa(ifa);
 913		}
 914		break;
 915	}
 916done:
 917	rtnl_unlock();
 918out:
 919	return ret;
 920rarok:
 921	rtnl_unlock();
 922	ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
 923	goto out;
 924}
 925
 926static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
 927{
 928	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 929	struct in_ifaddr *ifa;
 930	struct ifreq ifr;
 931	int done = 0;
 932
 933	if (!in_dev)
 934		goto out;
 935
 936	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
 937		if (!buf) {
 938			done += sizeof(ifr);
 939			continue;
 940		}
 941		if (len < (int) sizeof(ifr))
 942			break;
 943		memset(&ifr, 0, sizeof(struct ifreq));
 944		if (ifa->ifa_label)
 945			strcpy(ifr.ifr_name, ifa->ifa_label);
 946		else
 947			strcpy(ifr.ifr_name, dev->name);
 948
 949		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
 950		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
 951								ifa->ifa_local;
 952
 953		if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
 954			done = -EFAULT;
 955			break;
 956		}
 957		buf  += sizeof(struct ifreq);
 958		len  -= sizeof(struct ifreq);
 959		done += sizeof(struct ifreq);
 960	}
 961out:
 962	return done;
 963}
 964
 965__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 966{
 967	__be32 addr = 0;
 968	struct in_device *in_dev;
 969	struct net *net = dev_net(dev);
 970
 971	rcu_read_lock();
 972	in_dev = __in_dev_get_rcu(dev);
 973	if (!in_dev)
 974		goto no_in_dev;
 975
 976	for_primary_ifa(in_dev) {
 977		if (ifa->ifa_scope > scope)
 978			continue;
 979		if (!dst || inet_ifa_match(dst, ifa)) {
 980			addr = ifa->ifa_local;
 981			break;
 982		}
 983		if (!addr)
 984			addr = ifa->ifa_local;
 985	} endfor_ifa(in_dev);
 986
 987	if (addr)
 988		goto out_unlock;
 989no_in_dev:
 990
 991	/* Not loopback addresses on loopback should be preferred
 992	   in this case. It is importnat that lo is the first interface
 993	   in dev_base list.
 994	 */
 995	for_each_netdev_rcu(net, dev) {
 996		in_dev = __in_dev_get_rcu(dev);
 997		if (!in_dev)
 998			continue;
 999
1000		for_primary_ifa(in_dev) {
1001			if (ifa->ifa_scope != RT_SCOPE_LINK &&
1002			    ifa->ifa_scope <= scope) {
1003				addr = ifa->ifa_local;
1004				goto out_unlock;
1005			}
1006		} endfor_ifa(in_dev);
1007	}
1008out_unlock:
1009	rcu_read_unlock();
1010	return addr;
1011}
1012EXPORT_SYMBOL(inet_select_addr);
1013
1014static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1015			      __be32 local, int scope)
1016{
1017	int same = 0;
1018	__be32 addr = 0;
1019
1020	for_ifa(in_dev) {
1021		if (!addr &&
1022		    (local == ifa->ifa_local || !local) &&
1023		    ifa->ifa_scope <= scope) {
1024			addr = ifa->ifa_local;
1025			if (same)
1026				break;
1027		}
1028		if (!same) {
1029			same = (!local || inet_ifa_match(local, ifa)) &&
1030				(!dst || inet_ifa_match(dst, ifa));
1031			if (same && addr) {
1032				if (local || !dst)
1033					break;
1034				/* Is the selected addr into dst subnet? */
1035				if (inet_ifa_match(addr, ifa))
1036					break;
1037				/* No, then can we use new local src? */
1038				if (ifa->ifa_scope <= scope) {
1039					addr = ifa->ifa_local;
1040					break;
1041				}
1042				/* search for large dst subnet for addr */
1043				same = 0;
1044			}
1045		}
1046	} endfor_ifa(in_dev);
1047
1048	return same ? addr : 0;
1049}
1050
1051/*
1052 * Confirm that local IP address exists using wildcards:
1053 * - in_dev: only on this interface, 0=any interface
1054 * - dst: only in the same subnet as dst, 0=any dst
1055 * - local: address, 0=autoselect the local address
1056 * - scope: maximum allowed scope value for the local address
1057 */
1058__be32 inet_confirm_addr(struct in_device *in_dev,
1059			 __be32 dst, __be32 local, int scope)
1060{
1061	__be32 addr = 0;
1062	struct net_device *dev;
1063	struct net *net;
1064
1065	if (scope != RT_SCOPE_LINK)
1066		return confirm_addr_indev(in_dev, dst, local, scope);
1067
1068	net = dev_net(in_dev->dev);
1069	rcu_read_lock();
1070	for_each_netdev_rcu(net, dev) {
1071		in_dev = __in_dev_get_rcu(dev);
1072		if (in_dev) {
1073			addr = confirm_addr_indev(in_dev, dst, local, scope);
1074			if (addr)
1075				break;
1076		}
1077	}
1078	rcu_read_unlock();
1079
1080	return addr;
1081}
1082
1083/*
1084 *	Device notifier
1085 */
1086
1087int register_inetaddr_notifier(struct notifier_block *nb)
1088{
1089	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1090}
1091EXPORT_SYMBOL(register_inetaddr_notifier);
1092
1093int unregister_inetaddr_notifier(struct notifier_block *nb)
1094{
1095	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1096}
1097EXPORT_SYMBOL(unregister_inetaddr_notifier);
1098
1099/* Rename ifa_labels for a device name change. Make some effort to preserve
1100 * existing alias numbering and to create unique labels if possible.
1101*/
1102static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1103{
1104	struct in_ifaddr *ifa;
1105	int named = 0;
1106
1107	for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108		char old[IFNAMSIZ], *dot;
1109
1110		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112		if (named++ == 0)
1113			goto skip;
1114		dot = strchr(old, ':');
1115		if (dot == NULL) {
1116			sprintf(old, ":%d", named);
1117			dot = old;
1118		}
1119		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120			strcat(ifa->ifa_label, dot);
1121		else
1122			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123skip:
1124		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1125	}
1126}
1127
1128static inline bool inetdev_valid_mtu(unsigned mtu)
1129{
1130	return mtu >= 68;
1131}
1132
1133static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134					struct in_device *in_dev)
1135
1136{
1137	struct in_ifaddr *ifa;
1138
1139	for (ifa = in_dev->ifa_list; ifa;
1140	     ifa = ifa->ifa_next) {
1141		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1142			 ifa->ifa_local, dev,
1143			 ifa->ifa_local, NULL,
1144			 dev->dev_addr, NULL);
1145	}
1146}
1147
1148/* Called only under RTNL semaphore */
1149
1150static int inetdev_event(struct notifier_block *this, unsigned long event,
1151			 void *ptr)
1152{
1153	struct net_device *dev = ptr;
1154	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1155
1156	ASSERT_RTNL();
1157
1158	if (!in_dev) {
1159		if (event == NETDEV_REGISTER) {
1160			in_dev = inetdev_init(dev);
1161			if (!in_dev)
1162				return notifier_from_errno(-ENOMEM);
1163			if (dev->flags & IFF_LOOPBACK) {
1164				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166			}
1167		} else if (event == NETDEV_CHANGEMTU) {
1168			/* Re-enabling IP */
1169			if (inetdev_valid_mtu(dev->mtu))
1170				in_dev = inetdev_init(dev);
1171		}
1172		goto out;
1173	}
1174
1175	switch (event) {
1176	case NETDEV_REGISTER:
1177		printk(KERN_DEBUG "inetdev_event: bug\n");
1178		rcu_assign_pointer(dev->ip_ptr, NULL);
1179		break;
1180	case NETDEV_UP:
1181		if (!inetdev_valid_mtu(dev->mtu))
1182			break;
1183		if (dev->flags & IFF_LOOPBACK) {
1184			struct in_ifaddr *ifa = inet_alloc_ifa();
1185
1186			if (ifa) {
1187				INIT_HLIST_NODE(&ifa->hash);
1188				ifa->ifa_local =
1189				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190				ifa->ifa_prefixlen = 8;
1191				ifa->ifa_mask = inet_make_mask(8);
1192				in_dev_hold(in_dev);
1193				ifa->ifa_dev = in_dev;
1194				ifa->ifa_scope = RT_SCOPE_HOST;
1195				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196				inet_insert_ifa(ifa);
1197			}
1198		}
1199		ip_mc_up(in_dev);
1200		/* fall through */
1201	case NETDEV_CHANGEADDR:
1202		if (!IN_DEV_ARP_NOTIFY(in_dev))
1203			break;
1204		/* fall through */
1205	case NETDEV_NOTIFY_PEERS:
1206		/* Send gratuitous ARP to notify of link change */
1207		inetdev_send_gratuitous_arp(dev, in_dev);
1208		break;
1209	case NETDEV_DOWN:
1210		ip_mc_down(in_dev);
1211		break;
1212	case NETDEV_PRE_TYPE_CHANGE:
1213		ip_mc_unmap(in_dev);
1214		break;
1215	case NETDEV_POST_TYPE_CHANGE:
1216		ip_mc_remap(in_dev);
1217		break;
1218	case NETDEV_CHANGEMTU:
1219		if (inetdev_valid_mtu(dev->mtu))
1220			break;
1221		/* disable IP when MTU is not enough */
1222	case NETDEV_UNREGISTER:
1223		inetdev_destroy(in_dev);
1224		break;
1225	case NETDEV_CHANGENAME:
1226		/* Do not notify about label change, this event is
1227		 * not interesting to applications using netlink.
1228		 */
1229		inetdev_changename(dev, in_dev);
1230
1231		devinet_sysctl_unregister(in_dev);
1232		devinet_sysctl_register(in_dev);
1233		break;
1234	}
1235out:
1236	return NOTIFY_DONE;
1237}
1238
1239static struct notifier_block ip_netdev_notifier = {
1240	.notifier_call = inetdev_event,
1241};
1242
1243static inline size_t inet_nlmsg_size(void)
1244{
1245	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1246	       + nla_total_size(4) /* IFA_ADDRESS */
1247	       + nla_total_size(4) /* IFA_LOCAL */
1248	       + nla_total_size(4) /* IFA_BROADCAST */
1249	       + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1250}
1251
1252static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1253			    u32 pid, u32 seq, int event, unsigned int flags)
1254{
1255	struct ifaddrmsg *ifm;
1256	struct nlmsghdr  *nlh;
1257
1258	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1259	if (nlh == NULL)
1260		return -EMSGSIZE;
1261
1262	ifm = nlmsg_data(nlh);
1263	ifm->ifa_family = AF_INET;
1264	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1265	ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1266	ifm->ifa_scope = ifa->ifa_scope;
1267	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1268
1269	if (ifa->ifa_address)
1270		NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1271
1272	if (ifa->ifa_local)
1273		NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1274
1275	if (ifa->ifa_broadcast)
1276		NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1277
1278	if (ifa->ifa_label[0])
1279		NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1280
1281	return nlmsg_end(skb, nlh);
1282
1283nla_put_failure:
1284	nlmsg_cancel(skb, nlh);
1285	return -EMSGSIZE;
1286}
1287
1288static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1289{
1290	struct net *net = sock_net(skb->sk);
1291	int h, s_h;
1292	int idx, s_idx;
1293	int ip_idx, s_ip_idx;
1294	struct net_device *dev;
1295	struct in_device *in_dev;
1296	struct in_ifaddr *ifa;
1297	struct hlist_head *head;
1298	struct hlist_node *node;
1299
1300	s_h = cb->args[0];
1301	s_idx = idx = cb->args[1];
1302	s_ip_idx = ip_idx = cb->args[2];
1303
1304	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1305		idx = 0;
1306		head = &net->dev_index_head[h];
1307		rcu_read_lock();
1308		hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1309			if (idx < s_idx)
1310				goto cont;
1311			if (h > s_h || idx > s_idx)
1312				s_ip_idx = 0;
1313			in_dev = __in_dev_get_rcu(dev);
1314			if (!in_dev)
1315				goto cont;
1316
1317			for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1318			     ifa = ifa->ifa_next, ip_idx++) {
1319				if (ip_idx < s_ip_idx)
1320					continue;
1321				if (inet_fill_ifaddr(skb, ifa,
1322					     NETLINK_CB(cb->skb).pid,
1323					     cb->nlh->nlmsg_seq,
1324					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1325					rcu_read_unlock();
1326					goto done;
1327				}
1328			}
1329cont:
1330			idx++;
1331		}
1332		rcu_read_unlock();
1333	}
1334
1335done:
1336	cb->args[0] = h;
1337	cb->args[1] = idx;
1338	cb->args[2] = ip_idx;
1339
1340	return skb->len;
1341}
1342
1343static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1344		      u32 pid)
1345{
1346	struct sk_buff *skb;
1347	u32 seq = nlh ? nlh->nlmsg_seq : 0;
1348	int err = -ENOBUFS;
1349	struct net *net;
1350
1351	net = dev_net(ifa->ifa_dev->dev);
1352	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1353	if (skb == NULL)
1354		goto errout;
1355
1356	err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1357	if (err < 0) {
1358		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1359		WARN_ON(err == -EMSGSIZE);
1360		kfree_skb(skb);
1361		goto errout;
1362	}
1363	rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1364	return;
1365errout:
1366	if (err < 0)
1367		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1368}
1369
1370static size_t inet_get_link_af_size(const struct net_device *dev)
1371{
1372	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1373
1374	if (!in_dev)
1375		return 0;
1376
1377	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1378}
1379
1380static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1381{
1382	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1383	struct nlattr *nla;
1384	int i;
1385
1386	if (!in_dev)
1387		return -ENODATA;
1388
1389	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1390	if (nla == NULL)
1391		return -EMSGSIZE;
1392
1393	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1394		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1395
1396	return 0;
1397}
1398
1399static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1400	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1401};
1402
1403static int inet_validate_link_af(const struct net_device *dev,
1404				 const struct nlattr *nla)
1405{
1406	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1407	int err, rem;
1408
1409	if (dev && !__in_dev_get_rtnl(dev))
1410		return -EAFNOSUPPORT;
1411
1412	err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1413	if (err < 0)
1414		return err;
1415
1416	if (tb[IFLA_INET_CONF]) {
1417		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1418			int cfgid = nla_type(a);
1419
1420			if (nla_len(a) < 4)
1421				return -EINVAL;
1422
1423			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1424				return -EINVAL;
1425		}
1426	}
1427
1428	return 0;
1429}
1430
1431static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1432{
1433	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1434	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1435	int rem;
1436
1437	if (!in_dev)
1438		return -EAFNOSUPPORT;
1439
1440	if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1441		BUG();
1442
1443	if (tb[IFLA_INET_CONF]) {
1444		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1445			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1446	}
1447
1448	return 0;
1449}
1450
1451#ifdef CONFIG_SYSCTL
1452
1453static void devinet_copy_dflt_conf(struct net *net, int i)
1454{
1455	struct net_device *dev;
1456
1457	rcu_read_lock();
1458	for_each_netdev_rcu(net, dev) {
1459		struct in_device *in_dev;
1460
1461		in_dev = __in_dev_get_rcu(dev);
1462		if (in_dev && !test_bit(i, in_dev->cnf.state))
1463			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1464	}
1465	rcu_read_unlock();
1466}
1467
1468/* called with RTNL locked */
1469static void inet_forward_change(struct net *net)
1470{
1471	struct net_device *dev;
1472	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1473
1474	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1475	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1476
1477	for_each_netdev(net, dev) {
1478		struct in_device *in_dev;
1479		if (on)
1480			dev_disable_lro(dev);
1481		rcu_read_lock();
1482		in_dev = __in_dev_get_rcu(dev);
1483		if (in_dev)
1484			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1485		rcu_read_unlock();
1486	}
1487}
1488
1489static int devinet_conf_proc(ctl_table *ctl, int write,
1490			     void __user *buffer,
1491			     size_t *lenp, loff_t *ppos)
1492{
1493	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1494
1495	if (write) {
1496		struct ipv4_devconf *cnf = ctl->extra1;
1497		struct net *net = ctl->extra2;
1498		int i = (int *)ctl->data - cnf->data;
1499
1500		set_bit(i, cnf->state);
1501
1502		if (cnf == net->ipv4.devconf_dflt)
1503			devinet_copy_dflt_conf(net, i);
1504	}
1505
1506	return ret;
1507}
1508
1509static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510				  void __user *buffer,
1511				  size_t *lenp, loff_t *ppos)
1512{
1513	int *valp = ctl->data;
1514	int val = *valp;
1515	loff_t pos = *ppos;
1516	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1517
1518	if (write && *valp != val) {
1519		struct net *net = ctl->extra2;
1520
1521		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522			if (!rtnl_trylock()) {
1523				/* Restore the original values before restarting */
1524				*valp = val;
1525				*ppos = pos;
1526				return restart_syscall();
1527			}
1528			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529				inet_forward_change(net);
1530			} else if (*valp) {
1531				struct ipv4_devconf *cnf = ctl->extra1;
1532				struct in_device *idev =
1533					container_of(cnf, struct in_device, cnf);
1534				dev_disable_lro(idev->dev);
1535			}
1536			rtnl_unlock();
1537			rt_cache_flush(net, 0);
1538		}
1539	}
1540
1541	return ret;
1542}
1543
1544static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545				void __user *buffer,
1546				size_t *lenp, loff_t *ppos)
1547{
1548	int *valp = ctl->data;
1549	int val = *valp;
1550	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551	struct net *net = ctl->extra2;
1552
1553	if (write && *valp != val)
1554		rt_cache_flush(net, 0);
1555
1556	return ret;
1557}
1558
1559#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1560	{ \
1561		.procname	= name, \
1562		.data		= ipv4_devconf.data + \
1563				  IPV4_DEVCONF_ ## attr - 1, \
1564		.maxlen		= sizeof(int), \
1565		.mode		= mval, \
1566		.proc_handler	= proc, \
1567		.extra1		= &ipv4_devconf, \
1568	}
1569
1570#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1572
1573#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1575
1576#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1578
1579#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1581
1582static struct devinet_sysctl_table {
1583	struct ctl_table_header *sysctl_header;
1584	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585	char *dev_name;
1586} devinet_sysctl = {
1587	.devinet_vars = {
1588		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1589					     devinet_sysctl_forward),
1590		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1591
1592		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1593		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1594		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1595		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1596		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1597		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1598					"accept_source_route"),
1599		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1600		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1601		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1602		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1603		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1604		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1605		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1606		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1607		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1608		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1609		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1610		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1611		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1612
1613		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1614		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1615		DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1616					      "force_igmp_version"),
1617		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1618					      "promote_secondaries"),
1619	},
1620};
1621
1622static int __devinet_sysctl_register(struct net *net, char *dev_name,
1623					struct ipv4_devconf *p)
1624{
1625	int i;
1626	struct devinet_sysctl_table *t;
1627
1628#define DEVINET_CTL_PATH_DEV	3
1629
1630	struct ctl_path devinet_ctl_path[] = {
1631		{ .procname = "net",  },
1632		{ .procname = "ipv4", },
1633		{ .procname = "conf", },
1634		{ /* to be set */ },
1635		{ },
1636	};
1637
1638	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1639	if (!t)
1640		goto out;
1641
1642	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1643		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1644		t->devinet_vars[i].extra1 = p;
1645		t->devinet_vars[i].extra2 = net;
1646	}
1647
1648	/*
1649	 * Make a copy of dev_name, because '.procname' is regarded as const
1650	 * by sysctl and we wouldn't want anyone to change it under our feet
1651	 * (see SIOCSIFNAME).
1652	 */
1653	t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1654	if (!t->dev_name)
1655		goto free;
1656
1657	devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1658
1659	t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1660			t->devinet_vars);
1661	if (!t->sysctl_header)
1662		goto free_procname;
1663
1664	p->sysctl = t;
1665	return 0;
1666
1667free_procname:
1668	kfree(t->dev_name);
1669free:
1670	kfree(t);
1671out:
1672	return -ENOBUFS;
1673}
1674
1675static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1676{
1677	struct devinet_sysctl_table *t = cnf->sysctl;
1678
1679	if (t == NULL)
1680		return;
1681
1682	cnf->sysctl = NULL;
1683	unregister_net_sysctl_table(t->sysctl_header);
1684	kfree(t->dev_name);
1685	kfree(t);
1686}
1687
1688static void devinet_sysctl_register(struct in_device *idev)
1689{
1690	neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1691	__devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1692					&idev->cnf);
1693}
1694
1695static void devinet_sysctl_unregister(struct in_device *idev)
1696{
1697	__devinet_sysctl_unregister(&idev->cnf);
1698	neigh_sysctl_unregister(idev->arp_parms);
1699}
1700
1701static struct ctl_table ctl_forward_entry[] = {
1702	{
1703		.procname	= "ip_forward",
1704		.data		= &ipv4_devconf.data[
1705					IPV4_DEVCONF_FORWARDING - 1],
1706		.maxlen		= sizeof(int),
1707		.mode		= 0644,
1708		.proc_handler	= devinet_sysctl_forward,
1709		.extra1		= &ipv4_devconf,
1710		.extra2		= &init_net,
1711	},
1712	{ },
1713};
1714
1715static __net_initdata struct ctl_path net_ipv4_path[] = {
1716	{ .procname = "net", },
1717	{ .procname = "ipv4", },
1718	{ },
1719};
1720#endif
1721
1722static __net_init int devinet_init_net(struct net *net)
1723{
1724	int err;
1725	struct ipv4_devconf *all, *dflt;
1726#ifdef CONFIG_SYSCTL
1727	struct ctl_table *tbl = ctl_forward_entry;
1728	struct ctl_table_header *forw_hdr;
1729#endif
1730
1731	err = -ENOMEM;
1732	all = &ipv4_devconf;
1733	dflt = &ipv4_devconf_dflt;
1734
1735	if (!net_eq(net, &init_net)) {
1736		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1737		if (all == NULL)
1738			goto err_alloc_all;
1739
1740		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1741		if (dflt == NULL)
1742			goto err_alloc_dflt;
1743
1744#ifdef CONFIG_SYSCTL
1745		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1746		if (tbl == NULL)
1747			goto err_alloc_ctl;
1748
1749		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1750		tbl[0].extra1 = all;
1751		tbl[0].extra2 = net;
1752#endif
1753	}
1754
1755#ifdef CONFIG_SYSCTL
1756	err = __devinet_sysctl_register(net, "all", all);
1757	if (err < 0)
1758		goto err_reg_all;
1759
1760	err = __devinet_sysctl_register(net, "default", dflt);
1761	if (err < 0)
1762		goto err_reg_dflt;
1763
1764	err = -ENOMEM;
1765	forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1766	if (forw_hdr == NULL)
1767		goto err_reg_ctl;
1768	net->ipv4.forw_hdr = forw_hdr;
1769#endif
1770
1771	net->ipv4.devconf_all = all;
1772	net->ipv4.devconf_dflt = dflt;
1773	return 0;
1774
1775#ifdef CONFIG_SYSCTL
1776err_reg_ctl:
1777	__devinet_sysctl_unregister(dflt);
1778err_reg_dflt:
1779	__devinet_sysctl_unregister(all);
1780err_reg_all:
1781	if (tbl != ctl_forward_entry)
1782		kfree(tbl);
1783err_alloc_ctl:
1784#endif
1785	if (dflt != &ipv4_devconf_dflt)
1786		kfree(dflt);
1787err_alloc_dflt:
1788	if (all != &ipv4_devconf)
1789		kfree(all);
1790err_alloc_all:
1791	return err;
1792}
1793
1794static __net_exit void devinet_exit_net(struct net *net)
1795{
1796#ifdef CONFIG_SYSCTL
1797	struct ctl_table *tbl;
1798
1799	tbl = net->ipv4.forw_hdr->ctl_table_arg;
1800	unregister_net_sysctl_table(net->ipv4.forw_hdr);
1801	__devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1802	__devinet_sysctl_unregister(net->ipv4.devconf_all);
1803	kfree(tbl);
1804#endif
1805	kfree(net->ipv4.devconf_dflt);
1806	kfree(net->ipv4.devconf_all);
1807}
1808
1809static __net_initdata struct pernet_operations devinet_ops = {
1810	.init = devinet_init_net,
1811	.exit = devinet_exit_net,
1812};
1813
1814static struct rtnl_af_ops inet_af_ops = {
1815	.family		  = AF_INET,
1816	.fill_link_af	  = inet_fill_link_af,
1817	.get_link_af_size = inet_get_link_af_size,
1818	.validate_link_af = inet_validate_link_af,
1819	.set_link_af	  = inet_set_link_af,
1820};
1821
1822void __init devinet_init(void)
1823{
1824	int i;
1825
1826	for (i = 0; i < IN4_ADDR_HSIZE; i++)
1827		INIT_HLIST_HEAD(&inet_addr_lst[i]);
1828
1829	register_pernet_subsys(&devinet_ops);
1830
1831	register_gifconf(PF_INET, inet_gifconf);
1832	register_netdevice_notifier(&ip_netdev_notifier);
1833
1834	rtnl_af_register(&inet_af_ops);
1835
1836	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1837	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1838	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1839}
1840
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	NET3	IP device support routines.
   4 *
   5 *	Derived from the IP parts of dev.c 1.0.19
   6 * 		Authors:	Ross Biro
   7 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   8 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
   9 *
  10 *	Additional Authors:
  11 *		Alan Cox, <gw4pts@gw4pts.ampr.org>
  12 *		Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  13 *
  14 *	Changes:
  15 *		Alexey Kuznetsov:	pa_* fields are replaced with ifaddr
  16 *					lists.
  17 *		Cyrus Durgin:		updated for kmod
  18 *		Matthias Andree:	in devinet_ioctl, compare label and
  19 *					address (4.4BSD alias style support),
  20 *					fall back to comparing just the label
  21 *					if no match found.
  22 */
  23
  24
  25#include <linux/uaccess.h>
  26#include <linux/bitops.h>
  27#include <linux/capability.h>
  28#include <linux/module.h>
  29#include <linux/types.h>
  30#include <linux/kernel.h>
  31#include <linux/sched/signal.h>
  32#include <linux/string.h>
  33#include <linux/mm.h>
  34#include <linux/socket.h>
  35#include <linux/sockios.h>
  36#include <linux/in.h>
  37#include <linux/errno.h>
  38#include <linux/interrupt.h>
  39#include <linux/if_addr.h>
  40#include <linux/if_ether.h>
  41#include <linux/inet.h>
  42#include <linux/netdevice.h>
  43#include <linux/etherdevice.h>
  44#include <linux/skbuff.h>
  45#include <linux/init.h>
  46#include <linux/notifier.h>
  47#include <linux/inetdevice.h>
  48#include <linux/igmp.h>
  49#include <linux/slab.h>
  50#include <linux/hash.h>
  51#ifdef CONFIG_SYSCTL
  52#include <linux/sysctl.h>
  53#endif
  54#include <linux/kmod.h>
  55#include <linux/netconf.h>
  56
  57#include <net/arp.h>
  58#include <net/ip.h>
  59#include <net/route.h>
  60#include <net/ip_fib.h>
  61#include <net/rtnetlink.h>
  62#include <net/net_namespace.h>
  63#include <net/addrconf.h>
  64
  65#define IPV6ONLY_FLAGS	\
  66		(IFA_F_NODAD | IFA_F_OPTIMISTIC | IFA_F_DADFAILED | \
  67		 IFA_F_HOMEADDRESS | IFA_F_TENTATIVE | \
  68		 IFA_F_MANAGETEMPADDR | IFA_F_STABLE_PRIVACY)
  69
  70static struct ipv4_devconf ipv4_devconf = {
  71	.data = {
  72		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  73		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  74		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  75		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  76		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  77		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  78	},
  79};
  80
  81static struct ipv4_devconf ipv4_devconf_dflt = {
  82	.data = {
  83		[IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  84		[IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  85		[IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  86		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  87		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  88		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
  89		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
  90	},
  91};
  92
  93#define IPV4_DEVCONF_DFLT(net, attr) \
  94	IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  95
  96static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  97	[IFA_LOCAL]     	= { .type = NLA_U32 },
  98	[IFA_ADDRESS]   	= { .type = NLA_U32 },
  99	[IFA_BROADCAST] 	= { .type = NLA_U32 },
 100	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 101	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 102	[IFA_FLAGS]		= { .type = NLA_U32 },
 103	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
 104	[IFA_TARGET_NETNSID]	= { .type = NLA_S32 },
 105};
 106
 107struct inet_fill_args {
 108	u32 portid;
 109	u32 seq;
 110	int event;
 111	unsigned int flags;
 112	int netnsid;
 113	int ifindex;
 114};
 115
 116#define IN4_ADDR_HSIZE_SHIFT	8
 117#define IN4_ADDR_HSIZE		(1U << IN4_ADDR_HSIZE_SHIFT)
 118
 119static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 120
 121static u32 inet_addr_hash(const struct net *net, __be32 addr)
 122{
 123	u32 val = (__force u32) addr ^ net_hash_mix(net);
 124
 125	return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
 126}
 127
 128static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 129{
 130	u32 hash = inet_addr_hash(net, ifa->ifa_local);
 131
 132	ASSERT_RTNL();
 133	hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 134}
 135
 136static void inet_hash_remove(struct in_ifaddr *ifa)
 137{
 138	ASSERT_RTNL();
 139	hlist_del_init_rcu(&ifa->hash);
 140}
 141
 142/**
 143 * __ip_dev_find - find the first device with a given source address.
 144 * @net: the net namespace
 145 * @addr: the source address
 146 * @devref: if true, take a reference on the found device
 147 *
 148 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 149 */
 150struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 151{
 152	struct net_device *result = NULL;
 153	struct in_ifaddr *ifa;
 154
 155	rcu_read_lock();
 156	ifa = inet_lookup_ifaddr_rcu(net, addr);
 157	if (!ifa) {
 158		struct flowi4 fl4 = { .daddr = addr };
 159		struct fib_result res = { 0 };
 160		struct fib_table *local;
 161
 162		/* Fallback to FIB local table so that communication
 163		 * over loopback subnets work.
 164		 */
 165		local = fib_get_table(net, RT_TABLE_LOCAL);
 166		if (local &&
 167		    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 168		    res.type == RTN_LOCAL)
 169			result = FIB_RES_DEV(res);
 170	} else {
 171		result = ifa->ifa_dev->dev;
 172	}
 173	if (result && devref)
 174		dev_hold(result);
 175	rcu_read_unlock();
 176	return result;
 177}
 178EXPORT_SYMBOL(__ip_dev_find);
 179
 180/* called under RCU lock */
 181struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
 182{
 183	u32 hash = inet_addr_hash(net, addr);
 184	struct in_ifaddr *ifa;
 185
 186	hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
 187		if (ifa->ifa_local == addr &&
 188		    net_eq(dev_net(ifa->ifa_dev->dev), net))
 189			return ifa;
 190
 191	return NULL;
 192}
 193
 194static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 195
 196static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 197static BLOCKING_NOTIFIER_HEAD(inetaddr_validator_chain);
 198static void inet_del_ifa(struct in_device *in_dev,
 199			 struct in_ifaddr __rcu **ifap,
 200			 int destroy);
 201#ifdef CONFIG_SYSCTL
 202static int devinet_sysctl_register(struct in_device *idev);
 203static void devinet_sysctl_unregister(struct in_device *idev);
 204#else
 205static int devinet_sysctl_register(struct in_device *idev)
 206{
 207	return 0;
 208}
 209static void devinet_sysctl_unregister(struct in_device *idev)
 210{
 211}
 212#endif
 213
 214/* Locks all the inet devices. */
 215
 216static struct in_ifaddr *inet_alloc_ifa(void)
 217{
 218	return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 219}
 220
 221static void inet_rcu_free_ifa(struct rcu_head *head)
 222{
 223	struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 224	if (ifa->ifa_dev)
 225		in_dev_put(ifa->ifa_dev);
 226	kfree(ifa);
 227}
 228
 229static void inet_free_ifa(struct in_ifaddr *ifa)
 230{
 231	call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 232}
 233
 234void in_dev_finish_destroy(struct in_device *idev)
 235{
 236	struct net_device *dev = idev->dev;
 237
 238	WARN_ON(idev->ifa_list);
 239	WARN_ON(idev->mc_list);
 240	kfree(rcu_dereference_protected(idev->mc_hash, 1));
 241#ifdef NET_REFCNT_DEBUG
 242	pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 243#endif
 244	dev_put(dev);
 245	if (!idev->dead)
 246		pr_err("Freeing alive in_device %p\n", idev);
 247	else
 248		kfree(idev);
 249}
 250EXPORT_SYMBOL(in_dev_finish_destroy);
 251
 252static struct in_device *inetdev_init(struct net_device *dev)
 253{
 254	struct in_device *in_dev;
 255	int err = -ENOMEM;
 256
 257	ASSERT_RTNL();
 258
 259	in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 260	if (!in_dev)
 261		goto out;
 262	memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 263			sizeof(in_dev->cnf));
 264	in_dev->cnf.sysctl = NULL;
 265	in_dev->dev = dev;
 266	in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 267	if (!in_dev->arp_parms)
 268		goto out_kfree;
 269	if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 270		dev_disable_lro(dev);
 271	/* Reference in_dev->dev */
 272	dev_hold(dev);
 273	/* Account for reference dev->ip_ptr (below) */
 274	refcount_set(&in_dev->refcnt, 1);
 275
 276	err = devinet_sysctl_register(in_dev);
 277	if (err) {
 278		in_dev->dead = 1;
 279		in_dev_put(in_dev);
 280		in_dev = NULL;
 281		goto out;
 282	}
 283	ip_mc_init_dev(in_dev);
 284	if (dev->flags & IFF_UP)
 285		ip_mc_up(in_dev);
 286
 287	/* we can receive as soon as ip_ptr is set -- do this last */
 288	rcu_assign_pointer(dev->ip_ptr, in_dev);
 289out:
 290	return in_dev ?: ERR_PTR(err);
 291out_kfree:
 292	kfree(in_dev);
 293	in_dev = NULL;
 294	goto out;
 295}
 296
 297static void in_dev_rcu_put(struct rcu_head *head)
 298{
 299	struct in_device *idev = container_of(head, struct in_device, rcu_head);
 300	in_dev_put(idev);
 301}
 302
 303static void inetdev_destroy(struct in_device *in_dev)
 304{
 305	struct net_device *dev;
 306	struct in_ifaddr *ifa;
 307
 308	ASSERT_RTNL();
 309
 310	dev = in_dev->dev;
 311
 312	in_dev->dead = 1;
 313
 314	ip_mc_destroy_dev(in_dev);
 315
 316	while ((ifa = rtnl_dereference(in_dev->ifa_list)) != NULL) {
 317		inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 318		inet_free_ifa(ifa);
 319	}
 320
 321	RCU_INIT_POINTER(dev->ip_ptr, NULL);
 322
 323	devinet_sysctl_unregister(in_dev);
 324	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 325	arp_ifdown(dev);
 326
 327	call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 328}
 329
 330int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 331{
 332	const struct in_ifaddr *ifa;
 333
 334	rcu_read_lock();
 335	in_dev_for_each_ifa_rcu(ifa, in_dev) {
 336		if (inet_ifa_match(a, ifa)) {
 337			if (!b || inet_ifa_match(b, ifa)) {
 338				rcu_read_unlock();
 339				return 1;
 340			}
 341		}
 342	}
 343	rcu_read_unlock();
 344	return 0;
 345}
 346
 347static void __inet_del_ifa(struct in_device *in_dev,
 348			   struct in_ifaddr __rcu **ifap,
 349			   int destroy, struct nlmsghdr *nlh, u32 portid)
 350{
 351	struct in_ifaddr *promote = NULL;
 352	struct in_ifaddr *ifa, *ifa1;
 353	struct in_ifaddr *last_prim;
 354	struct in_ifaddr *prev_prom = NULL;
 355	int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 356
 357	ASSERT_RTNL();
 358
 359	ifa1 = rtnl_dereference(*ifap);
 360	last_prim = rtnl_dereference(in_dev->ifa_list);
 361	if (in_dev->dead)
 362		goto no_promotions;
 363
 364	/* 1. Deleting primary ifaddr forces deletion all secondaries
 365	 * unless alias promotion is set
 366	 **/
 367
 368	if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 369		struct in_ifaddr __rcu **ifap1 = &ifa1->ifa_next;
 370
 371		while ((ifa = rtnl_dereference(*ifap1)) != NULL) {
 372			if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 373			    ifa1->ifa_scope <= ifa->ifa_scope)
 374				last_prim = ifa;
 375
 376			if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 377			    ifa1->ifa_mask != ifa->ifa_mask ||
 378			    !inet_ifa_match(ifa1->ifa_address, ifa)) {
 379				ifap1 = &ifa->ifa_next;
 380				prev_prom = ifa;
 381				continue;
 382			}
 383
 384			if (!do_promote) {
 385				inet_hash_remove(ifa);
 386				*ifap1 = ifa->ifa_next;
 387
 388				rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 389				blocking_notifier_call_chain(&inetaddr_chain,
 390						NETDEV_DOWN, ifa);
 391				inet_free_ifa(ifa);
 392			} else {
 393				promote = ifa;
 394				break;
 395			}
 396		}
 397	}
 398
 399	/* On promotion all secondaries from subnet are changing
 400	 * the primary IP, we must remove all their routes silently
 401	 * and later to add them back with new prefsrc. Do this
 402	 * while all addresses are on the device list.
 403	 */
 404	for (ifa = promote; ifa; ifa = rtnl_dereference(ifa->ifa_next)) {
 405		if (ifa1->ifa_mask == ifa->ifa_mask &&
 406		    inet_ifa_match(ifa1->ifa_address, ifa))
 407			fib_del_ifaddr(ifa, ifa1);
 408	}
 409
 410no_promotions:
 411	/* 2. Unlink it */
 412
 413	*ifap = ifa1->ifa_next;
 414	inet_hash_remove(ifa1);
 415
 416	/* 3. Announce address deletion */
 417
 418	/* Send message first, then call notifier.
 419	   At first sight, FIB update triggered by notifier
 420	   will refer to already deleted ifaddr, that could confuse
 421	   netlink listeners. It is not true: look, gated sees
 422	   that route deleted and if it still thinks that ifaddr
 423	   is valid, it will try to restore deleted routes... Grr.
 424	   So that, this order is correct.
 425	 */
 426	rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
 427	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 428
 429	if (promote) {
 430		struct in_ifaddr *next_sec;
 431
 432		next_sec = rtnl_dereference(promote->ifa_next);
 433		if (prev_prom) {
 434			struct in_ifaddr *last_sec;
 435
 436			rcu_assign_pointer(prev_prom->ifa_next, next_sec);
 437
 438			last_sec = rtnl_dereference(last_prim->ifa_next);
 439			rcu_assign_pointer(promote->ifa_next, last_sec);
 440			rcu_assign_pointer(last_prim->ifa_next, promote);
 441		}
 442
 443		promote->ifa_flags &= ~IFA_F_SECONDARY;
 444		rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 445		blocking_notifier_call_chain(&inetaddr_chain,
 446				NETDEV_UP, promote);
 447		for (ifa = next_sec; ifa;
 448		     ifa = rtnl_dereference(ifa->ifa_next)) {
 449			if (ifa1->ifa_mask != ifa->ifa_mask ||
 450			    !inet_ifa_match(ifa1->ifa_address, ifa))
 451					continue;
 452			fib_add_ifaddr(ifa);
 453		}
 454
 455	}
 456	if (destroy)
 457		inet_free_ifa(ifa1);
 458}
 459
 460static void inet_del_ifa(struct in_device *in_dev,
 461			 struct in_ifaddr __rcu **ifap,
 462			 int destroy)
 463{
 464	__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 465}
 466
 467static void check_lifetime(struct work_struct *work);
 468
 469static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
 470
 471static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 472			     u32 portid, struct netlink_ext_ack *extack)
 473{
 474	struct in_ifaddr __rcu **last_primary, **ifap;
 475	struct in_device *in_dev = ifa->ifa_dev;
 476	struct in_validator_info ivi;
 477	struct in_ifaddr *ifa1;
 478	int ret;
 479
 480	ASSERT_RTNL();
 481
 482	if (!ifa->ifa_local) {
 483		inet_free_ifa(ifa);
 484		return 0;
 485	}
 486
 487	ifa->ifa_flags &= ~IFA_F_SECONDARY;
 488	last_primary = &in_dev->ifa_list;
 489
 490	/* Don't set IPv6 only flags to IPv4 addresses */
 491	ifa->ifa_flags &= ~IPV6ONLY_FLAGS;
 492
 493	ifap = &in_dev->ifa_list;
 494	ifa1 = rtnl_dereference(*ifap);
 495
 496	while (ifa1) {
 497		if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 498		    ifa->ifa_scope <= ifa1->ifa_scope)
 499			last_primary = &ifa1->ifa_next;
 500		if (ifa1->ifa_mask == ifa->ifa_mask &&
 501		    inet_ifa_match(ifa1->ifa_address, ifa)) {
 502			if (ifa1->ifa_local == ifa->ifa_local) {
 503				inet_free_ifa(ifa);
 504				return -EEXIST;
 505			}
 506			if (ifa1->ifa_scope != ifa->ifa_scope) {
 507				inet_free_ifa(ifa);
 508				return -EINVAL;
 509			}
 510			ifa->ifa_flags |= IFA_F_SECONDARY;
 511		}
 512
 513		ifap = &ifa1->ifa_next;
 514		ifa1 = rtnl_dereference(*ifap);
 515	}
 516
 517	/* Allow any devices that wish to register ifaddr validtors to weigh
 518	 * in now, before changes are committed.  The rntl lock is serializing
 519	 * access here, so the state should not change between a validator call
 520	 * and a final notify on commit.  This isn't invoked on promotion under
 521	 * the assumption that validators are checking the address itself, and
 522	 * not the flags.
 523	 */
 524	ivi.ivi_addr = ifa->ifa_address;
 525	ivi.ivi_dev = ifa->ifa_dev;
 526	ivi.extack = extack;
 527	ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
 528					   NETDEV_UP, &ivi);
 529	ret = notifier_to_errno(ret);
 530	if (ret) {
 531		inet_free_ifa(ifa);
 532		return ret;
 533	}
 534
 535	if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 536		prandom_seed((__force u32) ifa->ifa_local);
 537		ifap = last_primary;
 538	}
 539
 540	rcu_assign_pointer(ifa->ifa_next, *ifap);
 541	rcu_assign_pointer(*ifap, ifa);
 542
 543	inet_hash_insert(dev_net(in_dev->dev), ifa);
 544
 545	cancel_delayed_work(&check_lifetime_work);
 546	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
 547
 548	/* Send message first, then call notifier.
 549	   Notifier will trigger FIB update, so that
 550	   listeners of netlink will know about new ifaddr */
 551	rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
 552	blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 553
 554	return 0;
 555}
 556
 557static int inet_insert_ifa(struct in_ifaddr *ifa)
 558{
 559	return __inet_insert_ifa(ifa, NULL, 0, NULL);
 560}
 561
 562static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 563{
 564	struct in_device *in_dev = __in_dev_get_rtnl(dev);
 565
 566	ASSERT_RTNL();
 567
 568	if (!in_dev) {
 569		inet_free_ifa(ifa);
 570		return -ENOBUFS;
 571	}
 572	ipv4_devconf_setall(in_dev);
 573	neigh_parms_data_state_setall(in_dev->arp_parms);
 574	if (ifa->ifa_dev != in_dev) {
 575		WARN_ON(ifa->ifa_dev);
 576		in_dev_hold(in_dev);
 577		ifa->ifa_dev = in_dev;
 578	}
 579	if (ipv4_is_loopback(ifa->ifa_local))
 580		ifa->ifa_scope = RT_SCOPE_HOST;
 581	return inet_insert_ifa(ifa);
 582}
 583
 584/* Caller must hold RCU or RTNL :
 585 * We dont take a reference on found in_device
 586 */
 587struct in_device *inetdev_by_index(struct net *net, int ifindex)
 588{
 589	struct net_device *dev;
 590	struct in_device *in_dev = NULL;
 591
 592	rcu_read_lock();
 593	dev = dev_get_by_index_rcu(net, ifindex);
 594	if (dev)
 595		in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 596	rcu_read_unlock();
 597	return in_dev;
 598}
 599EXPORT_SYMBOL(inetdev_by_index);
 600
 601/* Called only from RTNL semaphored context. No locks. */
 602
 603struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 604				    __be32 mask)
 605{
 606	struct in_ifaddr *ifa;
 607
 608	ASSERT_RTNL();
 609
 610	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 611		if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 612			return ifa;
 613	}
 614	return NULL;
 615}
 616
 617static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
 618{
 619	struct ip_mreqn mreq = {
 620		.imr_multiaddr.s_addr = ifa->ifa_address,
 621		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
 622	};
 623	int ret;
 624
 625	ASSERT_RTNL();
 626
 627	lock_sock(sk);
 628	if (join)
 629		ret = ip_mc_join_group(sk, &mreq);
 630	else
 631		ret = ip_mc_leave_group(sk, &mreq);
 632	release_sock(sk);
 633
 634	return ret;
 635}
 636
 637static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
 638			    struct netlink_ext_ack *extack)
 639{
 640	struct net *net = sock_net(skb->sk);
 641	struct in_ifaddr __rcu **ifap;
 642	struct nlattr *tb[IFA_MAX+1];
 643	struct in_device *in_dev;
 644	struct ifaddrmsg *ifm;
 645	struct in_ifaddr *ifa;
 646
 647	int err = -EINVAL;
 648
 649	ASSERT_RTNL();
 650
 651	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
 652				     ifa_ipv4_policy, extack);
 653	if (err < 0)
 654		goto errout;
 655
 656	ifm = nlmsg_data(nlh);
 657	in_dev = inetdev_by_index(net, ifm->ifa_index);
 658	if (!in_dev) {
 659		err = -ENODEV;
 660		goto errout;
 661	}
 662
 663	for (ifap = &in_dev->ifa_list; (ifa = rtnl_dereference(*ifap)) != NULL;
 664	     ifap = &ifa->ifa_next) {
 665		if (tb[IFA_LOCAL] &&
 666		    ifa->ifa_local != nla_get_in_addr(tb[IFA_LOCAL]))
 667			continue;
 668
 669		if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 670			continue;
 671
 672		if (tb[IFA_ADDRESS] &&
 673		    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 674		    !inet_ifa_match(nla_get_in_addr(tb[IFA_ADDRESS]), ifa)))
 675			continue;
 676
 677		if (ipv4_is_multicast(ifa->ifa_address))
 678			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
 679		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 680		return 0;
 681	}
 682
 683	err = -EADDRNOTAVAIL;
 684errout:
 685	return err;
 686}
 687
 688#define INFINITY_LIFE_TIME	0xFFFFFFFF
 689
 690static void check_lifetime(struct work_struct *work)
 691{
 692	unsigned long now, next, next_sec, next_sched;
 693	struct in_ifaddr *ifa;
 694	struct hlist_node *n;
 695	int i;
 696
 697	now = jiffies;
 698	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 699
 700	for (i = 0; i < IN4_ADDR_HSIZE; i++) {
 701		bool change_needed = false;
 702
 703		rcu_read_lock();
 704		hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
 705			unsigned long age;
 706
 707			if (ifa->ifa_flags & IFA_F_PERMANENT)
 708				continue;
 709
 710			/* We try to batch several events at once. */
 711			age = (now - ifa->ifa_tstamp +
 712			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 713
 714			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 715			    age >= ifa->ifa_valid_lft) {
 716				change_needed = true;
 717			} else if (ifa->ifa_preferred_lft ==
 718				   INFINITY_LIFE_TIME) {
 719				continue;
 720			} else if (age >= ifa->ifa_preferred_lft) {
 721				if (time_before(ifa->ifa_tstamp +
 722						ifa->ifa_valid_lft * HZ, next))
 723					next = ifa->ifa_tstamp +
 724					       ifa->ifa_valid_lft * HZ;
 725
 726				if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
 727					change_needed = true;
 728			} else if (time_before(ifa->ifa_tstamp +
 729					       ifa->ifa_preferred_lft * HZ,
 730					       next)) {
 731				next = ifa->ifa_tstamp +
 732				       ifa->ifa_preferred_lft * HZ;
 733			}
 734		}
 735		rcu_read_unlock();
 736		if (!change_needed)
 737			continue;
 738		rtnl_lock();
 739		hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
 740			unsigned long age;
 741
 742			if (ifa->ifa_flags & IFA_F_PERMANENT)
 743				continue;
 744
 745			/* We try to batch several events at once. */
 746			age = (now - ifa->ifa_tstamp +
 747			       ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 748
 749			if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
 750			    age >= ifa->ifa_valid_lft) {
 751				struct in_ifaddr __rcu **ifap;
 752				struct in_ifaddr *tmp;
 753
 754				ifap = &ifa->ifa_dev->ifa_list;
 755				tmp = rtnl_dereference(*ifap);
 756				while (tmp) {
 757					if (tmp == ifa) {
 758						inet_del_ifa(ifa->ifa_dev,
 759							     ifap, 1);
 760						break;
 761					}
 762					ifap = &tmp->ifa_next;
 763					tmp = rtnl_dereference(*ifap);
 764				}
 765			} else if (ifa->ifa_preferred_lft !=
 766				   INFINITY_LIFE_TIME &&
 767				   age >= ifa->ifa_preferred_lft &&
 768				   !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
 769				ifa->ifa_flags |= IFA_F_DEPRECATED;
 770				rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
 771			}
 772		}
 773		rtnl_unlock();
 774	}
 775
 776	next_sec = round_jiffies_up(next);
 777	next_sched = next;
 778
 779	/* If rounded timeout is accurate enough, accept it. */
 780	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
 781		next_sched = next_sec;
 782
 783	now = jiffies;
 784	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
 785	if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
 786		next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
 787
 788	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
 789			next_sched - now);
 790}
 791
 792static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
 793			     __u32 prefered_lft)
 794{
 795	unsigned long timeout;
 796
 797	ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED);
 798
 799	timeout = addrconf_timeout_fixup(valid_lft, HZ);
 800	if (addrconf_finite_timeout(timeout))
 801		ifa->ifa_valid_lft = timeout;
 802	else
 803		ifa->ifa_flags |= IFA_F_PERMANENT;
 804
 805	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
 806	if (addrconf_finite_timeout(timeout)) {
 807		if (timeout == 0)
 808			ifa->ifa_flags |= IFA_F_DEPRECATED;
 809		ifa->ifa_preferred_lft = timeout;
 810	}
 811	ifa->ifa_tstamp = jiffies;
 812	if (!ifa->ifa_cstamp)
 813		ifa->ifa_cstamp = ifa->ifa_tstamp;
 814}
 815
 816static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 817				       __u32 *pvalid_lft, __u32 *pprefered_lft,
 818				       struct netlink_ext_ack *extack)
 819{
 820	struct nlattr *tb[IFA_MAX+1];
 821	struct in_ifaddr *ifa;
 822	struct ifaddrmsg *ifm;
 823	struct net_device *dev;
 824	struct in_device *in_dev;
 825	int err;
 826
 827	err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
 828				     ifa_ipv4_policy, extack);
 829	if (err < 0)
 830		goto errout;
 831
 832	ifm = nlmsg_data(nlh);
 833	err = -EINVAL;
 834	if (ifm->ifa_prefixlen > 32 || !tb[IFA_LOCAL])
 835		goto errout;
 836
 837	dev = __dev_get_by_index(net, ifm->ifa_index);
 838	err = -ENODEV;
 839	if (!dev)
 840		goto errout;
 841
 842	in_dev = __in_dev_get_rtnl(dev);
 843	err = -ENOBUFS;
 844	if (!in_dev)
 845		goto errout;
 846
 847	ifa = inet_alloc_ifa();
 848	if (!ifa)
 849		/*
 850		 * A potential indev allocation can be left alive, it stays
 851		 * assigned to its device and is destroy with it.
 852		 */
 853		goto errout;
 854
 855	ipv4_devconf_setall(in_dev);
 856	neigh_parms_data_state_setall(in_dev->arp_parms);
 857	in_dev_hold(in_dev);
 858
 859	if (!tb[IFA_ADDRESS])
 860		tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 861
 862	INIT_HLIST_NODE(&ifa->hash);
 863	ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 864	ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 865	ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
 866					 ifm->ifa_flags;
 867	ifa->ifa_scope = ifm->ifa_scope;
 868	ifa->ifa_dev = in_dev;
 869
 870	ifa->ifa_local = nla_get_in_addr(tb[IFA_LOCAL]);
 871	ifa->ifa_address = nla_get_in_addr(tb[IFA_ADDRESS]);
 872
 873	if (tb[IFA_BROADCAST])
 874		ifa->ifa_broadcast = nla_get_in_addr(tb[IFA_BROADCAST]);
 875
 876	if (tb[IFA_LABEL])
 877		nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 878	else
 879		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 880
 881	if (tb[IFA_RT_PRIORITY])
 882		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
 883
 884	if (tb[IFA_CACHEINFO]) {
 885		struct ifa_cacheinfo *ci;
 886
 887		ci = nla_data(tb[IFA_CACHEINFO]);
 888		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
 889			err = -EINVAL;
 890			goto errout_free;
 891		}
 892		*pvalid_lft = ci->ifa_valid;
 893		*pprefered_lft = ci->ifa_prefered;
 894	}
 895
 896	return ifa;
 897
 898errout_free:
 899	inet_free_ifa(ifa);
 900errout:
 901	return ERR_PTR(err);
 902}
 903
 904static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
 905{
 906	struct in_device *in_dev = ifa->ifa_dev;
 907	struct in_ifaddr *ifa1;
 908
 909	if (!ifa->ifa_local)
 910		return NULL;
 911
 912	in_dev_for_each_ifa_rtnl(ifa1, in_dev) {
 913		if (ifa1->ifa_mask == ifa->ifa_mask &&
 914		    inet_ifa_match(ifa1->ifa_address, ifa) &&
 915		    ifa1->ifa_local == ifa->ifa_local)
 916			return ifa1;
 917	}
 918	return NULL;
 919}
 920
 921static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 922			    struct netlink_ext_ack *extack)
 923{
 924	struct net *net = sock_net(skb->sk);
 925	struct in_ifaddr *ifa;
 926	struct in_ifaddr *ifa_existing;
 927	__u32 valid_lft = INFINITY_LIFE_TIME;
 928	__u32 prefered_lft = INFINITY_LIFE_TIME;
 929
 930	ASSERT_RTNL();
 931
 932	ifa = rtm_to_ifaddr(net, nlh, &valid_lft, &prefered_lft, extack);
 933	if (IS_ERR(ifa))
 934		return PTR_ERR(ifa);
 935
 936	ifa_existing = find_matching_ifa(ifa);
 937	if (!ifa_existing) {
 938		/* It would be best to check for !NLM_F_CREATE here but
 939		 * userspace already relies on not having to provide this.
 940		 */
 941		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 942		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
 943			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
 944					       true, ifa);
 945
 946			if (ret < 0) {
 947				inet_free_ifa(ifa);
 948				return ret;
 949			}
 950		}
 951		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
 952					 extack);
 953	} else {
 954		u32 new_metric = ifa->ifa_rt_priority;
 955
 956		inet_free_ifa(ifa);
 957
 958		if (nlh->nlmsg_flags & NLM_F_EXCL ||
 959		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 960			return -EEXIST;
 961		ifa = ifa_existing;
 962
 963		if (ifa->ifa_rt_priority != new_metric) {
 964			fib_modify_prefix_metric(ifa, new_metric);
 965			ifa->ifa_rt_priority = new_metric;
 966		}
 967
 968		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 969		cancel_delayed_work(&check_lifetime_work);
 970		queue_delayed_work(system_power_efficient_wq,
 971				&check_lifetime_work, 0);
 972		rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
 973	}
 974	return 0;
 975}
 976
 977/*
 978 *	Determine a default network mask, based on the IP address.
 979 */
 980
 981static int inet_abc_len(__be32 addr)
 982{
 983	int rc = -1;	/* Something else, probably a multicast. */
 984
 985	if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
 986		rc = 0;
 987	else {
 988		__u32 haddr = ntohl(addr);
 989		if (IN_CLASSA(haddr))
 990			rc = 8;
 991		else if (IN_CLASSB(haddr))
 992			rc = 16;
 993		else if (IN_CLASSC(haddr))
 994			rc = 24;
 995		else if (IN_CLASSE(haddr))
 996			rc = 32;
 997	}
 998
 999	return rc;
1000}
1001
1002
1003int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
1004{
1005	struct sockaddr_in sin_orig;
1006	struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
1007	struct in_ifaddr __rcu **ifap = NULL;
1008	struct in_device *in_dev;
1009	struct in_ifaddr *ifa = NULL;
1010	struct net_device *dev;
1011	char *colon;
1012	int ret = -EFAULT;
1013	int tryaddrmatch = 0;
1014
1015	ifr->ifr_name[IFNAMSIZ - 1] = 0;
1016
1017	/* save original address for comparison */
1018	memcpy(&sin_orig, sin, sizeof(*sin));
1019
1020	colon = strchr(ifr->ifr_name, ':');
1021	if (colon)
1022		*colon = 0;
1023
1024	dev_load(net, ifr->ifr_name);
1025
1026	switch (cmd) {
1027	case SIOCGIFADDR:	/* Get interface address */
1028	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1029	case SIOCGIFDSTADDR:	/* Get the destination address */
1030	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1031		/* Note that these ioctls will not sleep,
1032		   so that we do not impose a lock.
1033		   One day we will be forced to put shlock here (I mean SMP)
1034		 */
1035		tryaddrmatch = (sin_orig.sin_family == AF_INET);
1036		memset(sin, 0, sizeof(*sin));
1037		sin->sin_family = AF_INET;
1038		break;
1039
1040	case SIOCSIFFLAGS:
1041		ret = -EPERM;
1042		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1043			goto out;
1044		break;
1045	case SIOCSIFADDR:	/* Set interface address (and family) */
1046	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1047	case SIOCSIFDSTADDR:	/* Set the destination address */
1048	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1049		ret = -EPERM;
1050		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1051			goto out;
1052		ret = -EINVAL;
1053		if (sin->sin_family != AF_INET)
1054			goto out;
1055		break;
1056	default:
1057		ret = -EINVAL;
1058		goto out;
1059	}
1060
1061	rtnl_lock();
1062
1063	ret = -ENODEV;
1064	dev = __dev_get_by_name(net, ifr->ifr_name);
1065	if (!dev)
1066		goto done;
1067
1068	if (colon)
1069		*colon = ':';
1070
1071	in_dev = __in_dev_get_rtnl(dev);
1072	if (in_dev) {
1073		if (tryaddrmatch) {
1074			/* Matthias Andree */
1075			/* compare label and address (4.4BSD style) */
1076			/* note: we only do this for a limited set of ioctls
1077			   and only if the original address family was AF_INET.
1078			   This is checked above. */
1079
1080			for (ifap = &in_dev->ifa_list;
1081			     (ifa = rtnl_dereference(*ifap)) != NULL;
1082			     ifap = &ifa->ifa_next) {
1083				if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
1084				    sin_orig.sin_addr.s_addr ==
1085							ifa->ifa_local) {
1086					break; /* found */
1087				}
1088			}
1089		}
1090		/* we didn't get a match, maybe the application is
1091		   4.3BSD-style and passed in junk so we fall back to
1092		   comparing just the label */
1093		if (!ifa) {
1094			for (ifap = &in_dev->ifa_list;
1095			     (ifa = rtnl_dereference(*ifap)) != NULL;
1096			     ifap = &ifa->ifa_next)
1097				if (!strcmp(ifr->ifr_name, ifa->ifa_label))
1098					break;
1099		}
1100	}
1101
1102	ret = -EADDRNOTAVAIL;
1103	if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
1104		goto done;
1105
1106	switch (cmd) {
1107	case SIOCGIFADDR:	/* Get interface address */
1108		ret = 0;
1109		sin->sin_addr.s_addr = ifa->ifa_local;
1110		break;
1111
1112	case SIOCGIFBRDADDR:	/* Get the broadcast address */
1113		ret = 0;
1114		sin->sin_addr.s_addr = ifa->ifa_broadcast;
1115		break;
1116
1117	case SIOCGIFDSTADDR:	/* Get the destination address */
1118		ret = 0;
1119		sin->sin_addr.s_addr = ifa->ifa_address;
1120		break;
1121
1122	case SIOCGIFNETMASK:	/* Get the netmask for the interface */
1123		ret = 0;
1124		sin->sin_addr.s_addr = ifa->ifa_mask;
1125		break;
1126
1127	case SIOCSIFFLAGS:
1128		if (colon) {
1129			ret = -EADDRNOTAVAIL;
1130			if (!ifa)
1131				break;
1132			ret = 0;
1133			if (!(ifr->ifr_flags & IFF_UP))
1134				inet_del_ifa(in_dev, ifap, 1);
1135			break;
1136		}
1137		ret = dev_change_flags(dev, ifr->ifr_flags, NULL);
1138		break;
1139
1140	case SIOCSIFADDR:	/* Set interface address (and family) */
1141		ret = -EINVAL;
1142		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1143			break;
1144
1145		if (!ifa) {
1146			ret = -ENOBUFS;
1147			ifa = inet_alloc_ifa();
1148			if (!ifa)
1149				break;
1150			INIT_HLIST_NODE(&ifa->hash);
1151			if (colon)
1152				memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
1153			else
1154				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1155		} else {
1156			ret = 0;
1157			if (ifa->ifa_local == sin->sin_addr.s_addr)
1158				break;
1159			inet_del_ifa(in_dev, ifap, 0);
1160			ifa->ifa_broadcast = 0;
1161			ifa->ifa_scope = 0;
1162		}
1163
1164		ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
1165
1166		if (!(dev->flags & IFF_POINTOPOINT)) {
1167			ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
1168			ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
1169			if ((dev->flags & IFF_BROADCAST) &&
1170			    ifa->ifa_prefixlen < 31)
1171				ifa->ifa_broadcast = ifa->ifa_address |
1172						     ~ifa->ifa_mask;
1173		} else {
1174			ifa->ifa_prefixlen = 32;
1175			ifa->ifa_mask = inet_make_mask(32);
1176		}
1177		set_ifa_lifetime(ifa, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
1178		ret = inet_set_ifa(dev, ifa);
1179		break;
1180
1181	case SIOCSIFBRDADDR:	/* Set the broadcast address */
1182		ret = 0;
1183		if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
1184			inet_del_ifa(in_dev, ifap, 0);
1185			ifa->ifa_broadcast = sin->sin_addr.s_addr;
1186			inet_insert_ifa(ifa);
1187		}
1188		break;
1189
1190	case SIOCSIFDSTADDR:	/* Set the destination address */
1191		ret = 0;
1192		if (ifa->ifa_address == sin->sin_addr.s_addr)
1193			break;
1194		ret = -EINVAL;
1195		if (inet_abc_len(sin->sin_addr.s_addr) < 0)
1196			break;
1197		ret = 0;
1198		inet_del_ifa(in_dev, ifap, 0);
1199		ifa->ifa_address = sin->sin_addr.s_addr;
1200		inet_insert_ifa(ifa);
1201		break;
1202
1203	case SIOCSIFNETMASK: 	/* Set the netmask for the interface */
1204
1205		/*
1206		 *	The mask we set must be legal.
1207		 */
1208		ret = -EINVAL;
1209		if (bad_mask(sin->sin_addr.s_addr, 0))
1210			break;
1211		ret = 0;
1212		if (ifa->ifa_mask != sin->sin_addr.s_addr) {
1213			__be32 old_mask = ifa->ifa_mask;
1214			inet_del_ifa(in_dev, ifap, 0);
1215			ifa->ifa_mask = sin->sin_addr.s_addr;
1216			ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
1217
1218			/* See if current broadcast address matches
1219			 * with current netmask, then recalculate
1220			 * the broadcast address. Otherwise it's a
1221			 * funny address, so don't touch it since
1222			 * the user seems to know what (s)he's doing...
1223			 */
1224			if ((dev->flags & IFF_BROADCAST) &&
1225			    (ifa->ifa_prefixlen < 31) &&
1226			    (ifa->ifa_broadcast ==
1227			     (ifa->ifa_local|~old_mask))) {
1228				ifa->ifa_broadcast = (ifa->ifa_local |
1229						      ~sin->sin_addr.s_addr);
1230			}
1231			inet_insert_ifa(ifa);
1232		}
1233		break;
1234	}
1235done:
1236	rtnl_unlock();
1237out:
1238	return ret;
1239}
1240
1241static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
1242{
1243	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1244	const struct in_ifaddr *ifa;
1245	struct ifreq ifr;
1246	int done = 0;
1247
1248	if (WARN_ON(size > sizeof(struct ifreq)))
1249		goto out;
1250
1251	if (!in_dev)
1252		goto out;
1253
1254	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1255		if (!buf) {
1256			done += size;
1257			continue;
1258		}
1259		if (len < size)
1260			break;
1261		memset(&ifr, 0, sizeof(struct ifreq));
1262		strcpy(ifr.ifr_name, ifa->ifa_label);
1263
1264		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
1265		(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
1266								ifa->ifa_local;
1267
1268		if (copy_to_user(buf + done, &ifr, size)) {
1269			done = -EFAULT;
1270			break;
1271		}
1272		len  -= size;
1273		done += size;
1274	}
1275out:
1276	return done;
1277}
1278
1279static __be32 in_dev_select_addr(const struct in_device *in_dev,
1280				 int scope)
1281{
1282	const struct in_ifaddr *ifa;
1283
1284	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1285		if (ifa->ifa_flags & IFA_F_SECONDARY)
1286			continue;
1287		if (ifa->ifa_scope != RT_SCOPE_LINK &&
1288		    ifa->ifa_scope <= scope)
1289			return ifa->ifa_local;
1290	}
1291
1292	return 0;
1293}
1294
1295__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
1296{
1297	const struct in_ifaddr *ifa;
1298	__be32 addr = 0;
1299	unsigned char localnet_scope = RT_SCOPE_HOST;
1300	struct in_device *in_dev;
1301	struct net *net = dev_net(dev);
1302	int master_idx;
1303
1304	rcu_read_lock();
1305	in_dev = __in_dev_get_rcu(dev);
1306	if (!in_dev)
1307		goto no_in_dev;
1308
1309	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1310		localnet_scope = RT_SCOPE_LINK;
1311
1312	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1313		if (ifa->ifa_flags & IFA_F_SECONDARY)
1314			continue;
1315		if (min(ifa->ifa_scope, localnet_scope) > scope)
1316			continue;
1317		if (!dst || inet_ifa_match(dst, ifa)) {
1318			addr = ifa->ifa_local;
1319			break;
1320		}
1321		if (!addr)
1322			addr = ifa->ifa_local;
1323	}
1324
1325	if (addr)
1326		goto out_unlock;
1327no_in_dev:
1328	master_idx = l3mdev_master_ifindex_rcu(dev);
1329
1330	/* For VRFs, the VRF device takes the place of the loopback device,
1331	 * with addresses on it being preferred.  Note in such cases the
1332	 * loopback device will be among the devices that fail the master_idx
1333	 * equality check in the loop below.
1334	 */
1335	if (master_idx &&
1336	    (dev = dev_get_by_index_rcu(net, master_idx)) &&
1337	    (in_dev = __in_dev_get_rcu(dev))) {
1338		addr = in_dev_select_addr(in_dev, scope);
1339		if (addr)
1340			goto out_unlock;
1341	}
1342
1343	/* Not loopback addresses on loopback should be preferred
1344	   in this case. It is important that lo is the first interface
1345	   in dev_base list.
1346	 */
1347	for_each_netdev_rcu(net, dev) {
1348		if (l3mdev_master_ifindex_rcu(dev) != master_idx)
1349			continue;
1350
1351		in_dev = __in_dev_get_rcu(dev);
1352		if (!in_dev)
1353			continue;
1354
1355		addr = in_dev_select_addr(in_dev, scope);
1356		if (addr)
1357			goto out_unlock;
1358	}
1359out_unlock:
1360	rcu_read_unlock();
1361	return addr;
1362}
1363EXPORT_SYMBOL(inet_select_addr);
1364
1365static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1366			      __be32 local, int scope)
1367{
1368	unsigned char localnet_scope = RT_SCOPE_HOST;
1369	const struct in_ifaddr *ifa;
1370	__be32 addr = 0;
1371	int same = 0;
1372
1373	if (unlikely(IN_DEV_ROUTE_LOCALNET(in_dev)))
1374		localnet_scope = RT_SCOPE_LINK;
1375
1376	in_dev_for_each_ifa_rcu(ifa, in_dev) {
1377		unsigned char min_scope = min(ifa->ifa_scope, localnet_scope);
1378
1379		if (!addr &&
1380		    (local == ifa->ifa_local || !local) &&
1381		    min_scope <= scope) {
1382			addr = ifa->ifa_local;
1383			if (same)
1384				break;
1385		}
1386		if (!same) {
1387			same = (!local || inet_ifa_match(local, ifa)) &&
1388				(!dst || inet_ifa_match(dst, ifa));
1389			if (same && addr) {
1390				if (local || !dst)
1391					break;
1392				/* Is the selected addr into dst subnet? */
1393				if (inet_ifa_match(addr, ifa))
1394					break;
1395				/* No, then can we use new local src? */
1396				if (min_scope <= scope) {
1397					addr = ifa->ifa_local;
1398					break;
1399				}
1400				/* search for large dst subnet for addr */
1401				same = 0;
1402			}
1403		}
1404	}
1405
1406	return same ? addr : 0;
1407}
1408
1409/*
1410 * Confirm that local IP address exists using wildcards:
1411 * - net: netns to check, cannot be NULL
1412 * - in_dev: only on this interface, NULL=any interface
1413 * - dst: only in the same subnet as dst, 0=any dst
1414 * - local: address, 0=autoselect the local address
1415 * - scope: maximum allowed scope value for the local address
1416 */
1417__be32 inet_confirm_addr(struct net *net, struct in_device *in_dev,
1418			 __be32 dst, __be32 local, int scope)
1419{
1420	__be32 addr = 0;
1421	struct net_device *dev;
1422
1423	if (in_dev)
1424		return confirm_addr_indev(in_dev, dst, local, scope);
1425
1426	rcu_read_lock();
1427	for_each_netdev_rcu(net, dev) {
1428		in_dev = __in_dev_get_rcu(dev);
1429		if (in_dev) {
1430			addr = confirm_addr_indev(in_dev, dst, local, scope);
1431			if (addr)
1432				break;
1433		}
1434	}
1435	rcu_read_unlock();
1436
1437	return addr;
1438}
1439EXPORT_SYMBOL(inet_confirm_addr);
1440
1441/*
1442 *	Device notifier
1443 */
1444
1445int register_inetaddr_notifier(struct notifier_block *nb)
1446{
1447	return blocking_notifier_chain_register(&inetaddr_chain, nb);
1448}
1449EXPORT_SYMBOL(register_inetaddr_notifier);
1450
1451int unregister_inetaddr_notifier(struct notifier_block *nb)
1452{
1453	return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1454}
1455EXPORT_SYMBOL(unregister_inetaddr_notifier);
1456
1457int register_inetaddr_validator_notifier(struct notifier_block *nb)
1458{
1459	return blocking_notifier_chain_register(&inetaddr_validator_chain, nb);
1460}
1461EXPORT_SYMBOL(register_inetaddr_validator_notifier);
1462
1463int unregister_inetaddr_validator_notifier(struct notifier_block *nb)
1464{
1465	return blocking_notifier_chain_unregister(&inetaddr_validator_chain,
1466	    nb);
1467}
1468EXPORT_SYMBOL(unregister_inetaddr_validator_notifier);
1469
1470/* Rename ifa_labels for a device name change. Make some effort to preserve
1471 * existing alias numbering and to create unique labels if possible.
1472*/
1473static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1474{
1475	struct in_ifaddr *ifa;
1476	int named = 0;
1477
1478	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1479		char old[IFNAMSIZ], *dot;
1480
1481		memcpy(old, ifa->ifa_label, IFNAMSIZ);
1482		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1483		if (named++ == 0)
1484			goto skip;
1485		dot = strchr(old, ':');
1486		if (!dot) {
1487			sprintf(old, ":%d", named);
1488			dot = old;
1489		}
1490		if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1491			strcat(ifa->ifa_label, dot);
1492		else
1493			strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1494skip:
1495		rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1496	}
1497}
1498
1499static bool inetdev_valid_mtu(unsigned int mtu)
1500{
1501	return mtu >= IPV4_MIN_MTU;
1502}
1503
1504static void inetdev_send_gratuitous_arp(struct net_device *dev,
1505					struct in_device *in_dev)
1506
1507{
1508	const struct in_ifaddr *ifa;
1509
1510	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1511		arp_send(ARPOP_REQUEST, ETH_P_ARP,
1512			 ifa->ifa_local, dev,
1513			 ifa->ifa_local, NULL,
1514			 dev->dev_addr, NULL);
1515	}
1516}
1517
1518/* Called only under RTNL semaphore */
1519
1520static int inetdev_event(struct notifier_block *this, unsigned long event,
1521			 void *ptr)
1522{
1523	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1524	struct in_device *in_dev = __in_dev_get_rtnl(dev);
1525
1526	ASSERT_RTNL();
1527
1528	if (!in_dev) {
1529		if (event == NETDEV_REGISTER) {
1530			in_dev = inetdev_init(dev);
1531			if (IS_ERR(in_dev))
1532				return notifier_from_errno(PTR_ERR(in_dev));
1533			if (dev->flags & IFF_LOOPBACK) {
1534				IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1535				IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1536			}
1537		} else if (event == NETDEV_CHANGEMTU) {
1538			/* Re-enabling IP */
1539			if (inetdev_valid_mtu(dev->mtu))
1540				in_dev = inetdev_init(dev);
1541		}
1542		goto out;
1543	}
1544
1545	switch (event) {
1546	case NETDEV_REGISTER:
1547		pr_debug("%s: bug\n", __func__);
1548		RCU_INIT_POINTER(dev->ip_ptr, NULL);
1549		break;
1550	case NETDEV_UP:
1551		if (!inetdev_valid_mtu(dev->mtu))
1552			break;
1553		if (dev->flags & IFF_LOOPBACK) {
1554			struct in_ifaddr *ifa = inet_alloc_ifa();
1555
1556			if (ifa) {
1557				INIT_HLIST_NODE(&ifa->hash);
1558				ifa->ifa_local =
1559				  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1560				ifa->ifa_prefixlen = 8;
1561				ifa->ifa_mask = inet_make_mask(8);
1562				in_dev_hold(in_dev);
1563				ifa->ifa_dev = in_dev;
1564				ifa->ifa_scope = RT_SCOPE_HOST;
1565				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1566				set_ifa_lifetime(ifa, INFINITY_LIFE_TIME,
1567						 INFINITY_LIFE_TIME);
1568				ipv4_devconf_setall(in_dev);
1569				neigh_parms_data_state_setall(in_dev->arp_parms);
1570				inet_insert_ifa(ifa);
1571			}
1572		}
1573		ip_mc_up(in_dev);
1574		/* fall through */
1575	case NETDEV_CHANGEADDR:
1576		if (!IN_DEV_ARP_NOTIFY(in_dev))
1577			break;
1578		/* fall through */
1579	case NETDEV_NOTIFY_PEERS:
1580		/* Send gratuitous ARP to notify of link change */
1581		inetdev_send_gratuitous_arp(dev, in_dev);
1582		break;
1583	case NETDEV_DOWN:
1584		ip_mc_down(in_dev);
1585		break;
1586	case NETDEV_PRE_TYPE_CHANGE:
1587		ip_mc_unmap(in_dev);
1588		break;
1589	case NETDEV_POST_TYPE_CHANGE:
1590		ip_mc_remap(in_dev);
1591		break;
1592	case NETDEV_CHANGEMTU:
1593		if (inetdev_valid_mtu(dev->mtu))
1594			break;
1595		/* disable IP when MTU is not enough */
1596		/* fall through */
1597	case NETDEV_UNREGISTER:
1598		inetdev_destroy(in_dev);
1599		break;
1600	case NETDEV_CHANGENAME:
1601		/* Do not notify about label change, this event is
1602		 * not interesting to applications using netlink.
1603		 */
1604		inetdev_changename(dev, in_dev);
1605
1606		devinet_sysctl_unregister(in_dev);
1607		devinet_sysctl_register(in_dev);
1608		break;
1609	}
1610out:
1611	return NOTIFY_DONE;
1612}
1613
1614static struct notifier_block ip_netdev_notifier = {
1615	.notifier_call = inetdev_event,
1616};
1617
1618static size_t inet_nlmsg_size(void)
1619{
1620	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1621	       + nla_total_size(4) /* IFA_ADDRESS */
1622	       + nla_total_size(4) /* IFA_LOCAL */
1623	       + nla_total_size(4) /* IFA_BROADCAST */
1624	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
1625	       + nla_total_size(4)  /* IFA_FLAGS */
1626	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
1627	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
1628}
1629
1630static inline u32 cstamp_delta(unsigned long cstamp)
1631{
1632	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
1633}
1634
1635static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp,
1636			 unsigned long tstamp, u32 preferred, u32 valid)
1637{
1638	struct ifa_cacheinfo ci;
1639
1640	ci.cstamp = cstamp_delta(cstamp);
1641	ci.tstamp = cstamp_delta(tstamp);
1642	ci.ifa_prefered = preferred;
1643	ci.ifa_valid = valid;
1644
1645	return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci);
1646}
1647
1648static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1649			    struct inet_fill_args *args)
1650{
1651	struct ifaddrmsg *ifm;
1652	struct nlmsghdr  *nlh;
1653	u32 preferred, valid;
1654
1655	nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm),
1656			args->flags);
1657	if (!nlh)
1658		return -EMSGSIZE;
1659
1660	ifm = nlmsg_data(nlh);
1661	ifm->ifa_family = AF_INET;
1662	ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1663	ifm->ifa_flags = ifa->ifa_flags;
1664	ifm->ifa_scope = ifa->ifa_scope;
1665	ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1666
1667	if (args->netnsid >= 0 &&
1668	    nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid))
1669		goto nla_put_failure;
1670
1671	if (!(ifm->ifa_flags & IFA_F_PERMANENT)) {
1672		preferred = ifa->ifa_preferred_lft;
1673		valid = ifa->ifa_valid_lft;
1674		if (preferred != INFINITY_LIFE_TIME) {
1675			long tval = (jiffies - ifa->ifa_tstamp) / HZ;
1676
1677			if (preferred > tval)
1678				preferred -= tval;
1679			else
1680				preferred = 0;
1681			if (valid != INFINITY_LIFE_TIME) {
1682				if (valid > tval)
1683					valid -= tval;
1684				else
1685					valid = 0;
1686			}
1687		}
1688	} else {
1689		preferred = INFINITY_LIFE_TIME;
1690		valid = INFINITY_LIFE_TIME;
1691	}
1692	if ((ifa->ifa_address &&
1693	     nla_put_in_addr(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1694	    (ifa->ifa_local &&
1695	     nla_put_in_addr(skb, IFA_LOCAL, ifa->ifa_local)) ||
1696	    (ifa->ifa_broadcast &&
1697	     nla_put_in_addr(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1698	    (ifa->ifa_label[0] &&
1699	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
1700	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
1701	    (ifa->ifa_rt_priority &&
1702	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
1703	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
1704			  preferred, valid))
1705		goto nla_put_failure;
1706
1707	nlmsg_end(skb, nlh);
1708	return 0;
1709
1710nla_put_failure:
1711	nlmsg_cancel(skb, nlh);
1712	return -EMSGSIZE;
1713}
1714
1715static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
1716				      struct inet_fill_args *fillargs,
1717				      struct net **tgt_net, struct sock *sk,
1718				      struct netlink_callback *cb)
1719{
1720	struct netlink_ext_ack *extack = cb->extack;
1721	struct nlattr *tb[IFA_MAX+1];
1722	struct ifaddrmsg *ifm;
1723	int err, i;
1724
1725	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
1726		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
1727		return -EINVAL;
1728	}
1729
1730	ifm = nlmsg_data(nlh);
1731	if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
1732		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
1733		return -EINVAL;
1734	}
1735
1736	fillargs->ifindex = ifm->ifa_index;
1737	if (fillargs->ifindex) {
1738		cb->answer_flags |= NLM_F_DUMP_FILTERED;
1739		fillargs->flags |= NLM_F_DUMP_FILTERED;
1740	}
1741
1742	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
1743					    ifa_ipv4_policy, extack);
1744	if (err < 0)
1745		return err;
1746
1747	for (i = 0; i <= IFA_MAX; ++i) {
1748		if (!tb[i])
1749			continue;
1750
1751		if (i == IFA_TARGET_NETNSID) {
1752			struct net *net;
1753
1754			fillargs->netnsid = nla_get_s32(tb[i]);
1755
1756			net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
1757			if (IS_ERR(net)) {
1758				fillargs->netnsid = -1;
1759				NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
1760				return PTR_ERR(net);
1761			}
1762			*tgt_net = net;
1763		} else {
1764			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in dump request");
1765			return -EINVAL;
1766		}
1767	}
1768
1769	return 0;
1770}
1771
1772static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb,
1773			    struct netlink_callback *cb, int s_ip_idx,
1774			    struct inet_fill_args *fillargs)
1775{
1776	struct in_ifaddr *ifa;
1777	int ip_idx = 0;
1778	int err;
1779
1780	in_dev_for_each_ifa_rtnl(ifa, in_dev) {
1781		if (ip_idx < s_ip_idx) {
1782			ip_idx++;
1783			continue;
1784		}
1785		err = inet_fill_ifaddr(skb, ifa, fillargs);
1786		if (err < 0)
1787			goto done;
1788
1789		nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1790		ip_idx++;
1791	}
1792	err = 0;
1793
1794done:
1795	cb->args[2] = ip_idx;
1796
1797	return err;
1798}
1799
1800static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1801{
1802	const struct nlmsghdr *nlh = cb->nlh;
1803	struct inet_fill_args fillargs = {
1804		.portid = NETLINK_CB(cb->skb).portid,
1805		.seq = nlh->nlmsg_seq,
1806		.event = RTM_NEWADDR,
1807		.flags = NLM_F_MULTI,
1808		.netnsid = -1,
1809	};
1810	struct net *net = sock_net(skb->sk);
1811	struct net *tgt_net = net;
1812	int h, s_h;
1813	int idx, s_idx;
1814	int s_ip_idx;
1815	struct net_device *dev;
1816	struct in_device *in_dev;
1817	struct hlist_head *head;
1818	int err = 0;
1819
1820	s_h = cb->args[0];
1821	s_idx = idx = cb->args[1];
1822	s_ip_idx = cb->args[2];
1823
1824	if (cb->strict_check) {
1825		err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
1826						 skb->sk, cb);
1827		if (err < 0)
1828			goto put_tgt_net;
1829
1830		err = 0;
1831		if (fillargs.ifindex) {
1832			dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
1833			if (!dev) {
1834				err = -ENODEV;
1835				goto put_tgt_net;
1836			}
1837
1838			in_dev = __in_dev_get_rtnl(dev);
1839			if (in_dev) {
1840				err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1841						       &fillargs);
1842			}
1843			goto put_tgt_net;
1844		}
1845	}
1846
1847	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1848		idx = 0;
1849		head = &tgt_net->dev_index_head[h];
1850		rcu_read_lock();
1851		cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
1852			  tgt_net->dev_base_seq;
1853		hlist_for_each_entry_rcu(dev, head, index_hlist) {
1854			if (idx < s_idx)
1855				goto cont;
1856			if (h > s_h || idx > s_idx)
1857				s_ip_idx = 0;
1858			in_dev = __in_dev_get_rcu(dev);
1859			if (!in_dev)
1860				goto cont;
1861
1862			err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx,
1863					       &fillargs);
1864			if (err < 0) {
1865				rcu_read_unlock();
1866				goto done;
1867			}
1868cont:
1869			idx++;
1870		}
1871		rcu_read_unlock();
1872	}
1873
1874done:
1875	cb->args[0] = h;
1876	cb->args[1] = idx;
1877put_tgt_net:
1878	if (fillargs.netnsid >= 0)
1879		put_net(tgt_net);
1880
1881	return skb->len ? : err;
1882}
1883
1884static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1885		      u32 portid)
1886{
1887	struct inet_fill_args fillargs = {
1888		.portid = portid,
1889		.seq = nlh ? nlh->nlmsg_seq : 0,
1890		.event = event,
1891		.flags = 0,
1892		.netnsid = -1,
1893	};
1894	struct sk_buff *skb;
1895	int err = -ENOBUFS;
1896	struct net *net;
1897
1898	net = dev_net(ifa->ifa_dev->dev);
1899	skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1900	if (!skb)
1901		goto errout;
1902
1903	err = inet_fill_ifaddr(skb, ifa, &fillargs);
1904	if (err < 0) {
1905		/* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1906		WARN_ON(err == -EMSGSIZE);
1907		kfree_skb(skb);
1908		goto errout;
1909	}
1910	rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1911	return;
1912errout:
1913	if (err < 0)
1914		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1915}
1916
1917static size_t inet_get_link_af_size(const struct net_device *dev,
1918				    u32 ext_filter_mask)
1919{
1920	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1921
1922	if (!in_dev)
1923		return 0;
1924
1925	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1926}
1927
1928static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
1929			     u32 ext_filter_mask)
1930{
1931	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1932	struct nlattr *nla;
1933	int i;
1934
1935	if (!in_dev)
1936		return -ENODATA;
1937
1938	nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1939	if (!nla)
1940		return -EMSGSIZE;
1941
1942	for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1943		((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1944
1945	return 0;
1946}
1947
1948static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1949	[IFLA_INET_CONF]	= { .type = NLA_NESTED },
1950};
1951
1952static int inet_validate_link_af(const struct net_device *dev,
1953				 const struct nlattr *nla)
1954{
1955	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1956	int err, rem;
1957
1958	if (dev && !__in_dev_get_rcu(dev))
1959		return -EAFNOSUPPORT;
1960
1961	err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
1962					  inet_af_policy, NULL);
1963	if (err < 0)
1964		return err;
1965
1966	if (tb[IFLA_INET_CONF]) {
1967		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1968			int cfgid = nla_type(a);
1969
1970			if (nla_len(a) < 4)
1971				return -EINVAL;
1972
1973			if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1974				return -EINVAL;
1975		}
1976	}
1977
1978	return 0;
1979}
1980
1981static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1982{
1983	struct in_device *in_dev = __in_dev_get_rcu(dev);
1984	struct nlattr *a, *tb[IFLA_INET_MAX+1];
1985	int rem;
1986
1987	if (!in_dev)
1988		return -EAFNOSUPPORT;
1989
1990	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
1991		BUG();
1992
1993	if (tb[IFLA_INET_CONF]) {
1994		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1995			ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1996	}
1997
1998	return 0;
1999}
2000
2001static int inet_netconf_msgsize_devconf(int type)
2002{
2003	int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
2004		   + nla_total_size(4);	/* NETCONFA_IFINDEX */
2005	bool all = false;
2006
2007	if (type == NETCONFA_ALL)
2008		all = true;
2009
2010	if (all || type == NETCONFA_FORWARDING)
2011		size += nla_total_size(4);
2012	if (all || type == NETCONFA_RP_FILTER)
2013		size += nla_total_size(4);
2014	if (all || type == NETCONFA_MC_FORWARDING)
2015		size += nla_total_size(4);
2016	if (all || type == NETCONFA_BC_FORWARDING)
2017		size += nla_total_size(4);
2018	if (all || type == NETCONFA_PROXY_NEIGH)
2019		size += nla_total_size(4);
2020	if (all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
2021		size += nla_total_size(4);
2022
2023	return size;
2024}
2025
2026static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
2027				     struct ipv4_devconf *devconf, u32 portid,
2028				     u32 seq, int event, unsigned int flags,
2029				     int type)
2030{
2031	struct nlmsghdr  *nlh;
2032	struct netconfmsg *ncm;
2033	bool all = false;
2034
2035	nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
2036			flags);
2037	if (!nlh)
2038		return -EMSGSIZE;
2039
2040	if (type == NETCONFA_ALL)
2041		all = true;
2042
2043	ncm = nlmsg_data(nlh);
2044	ncm->ncm_family = AF_INET;
2045
2046	if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0)
2047		goto nla_put_failure;
2048
2049	if (!devconf)
2050		goto out;
2051
2052	if ((all || type == NETCONFA_FORWARDING) &&
2053	    nla_put_s32(skb, NETCONFA_FORWARDING,
2054			IPV4_DEVCONF(*devconf, FORWARDING)) < 0)
2055		goto nla_put_failure;
2056	if ((all || type == NETCONFA_RP_FILTER) &&
2057	    nla_put_s32(skb, NETCONFA_RP_FILTER,
2058			IPV4_DEVCONF(*devconf, RP_FILTER)) < 0)
2059		goto nla_put_failure;
2060	if ((all || type == NETCONFA_MC_FORWARDING) &&
2061	    nla_put_s32(skb, NETCONFA_MC_FORWARDING,
2062			IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0)
2063		goto nla_put_failure;
2064	if ((all || type == NETCONFA_BC_FORWARDING) &&
2065	    nla_put_s32(skb, NETCONFA_BC_FORWARDING,
2066			IPV4_DEVCONF(*devconf, BC_FORWARDING)) < 0)
2067		goto nla_put_failure;
2068	if ((all || type == NETCONFA_PROXY_NEIGH) &&
2069	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
2070			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
2071		goto nla_put_failure;
2072	if ((all || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
2073	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2074			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
2075		goto nla_put_failure;
2076
2077out:
2078	nlmsg_end(skb, nlh);
2079	return 0;
2080
2081nla_put_failure:
2082	nlmsg_cancel(skb, nlh);
2083	return -EMSGSIZE;
2084}
2085
2086void inet_netconf_notify_devconf(struct net *net, int event, int type,
2087				 int ifindex, struct ipv4_devconf *devconf)
2088{
2089	struct sk_buff *skb;
2090	int err = -ENOBUFS;
2091
2092	skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_KERNEL);
2093	if (!skb)
2094		goto errout;
2095
2096	err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0,
2097					event, 0, type);
2098	if (err < 0) {
2099		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2100		WARN_ON(err == -EMSGSIZE);
2101		kfree_skb(skb);
2102		goto errout;
2103	}
2104	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_KERNEL);
2105	return;
2106errout:
2107	if (err < 0)
2108		rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err);
2109}
2110
2111static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
2112	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },
2113	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
2114	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
2115	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
2116	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
2117};
2118
2119static int inet_netconf_valid_get_req(struct sk_buff *skb,
2120				      const struct nlmsghdr *nlh,
2121				      struct nlattr **tb,
2122				      struct netlink_ext_ack *extack)
2123{
2124	int i, err;
2125
2126	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
2127		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf get request");
2128		return -EINVAL;
2129	}
2130
2131	if (!netlink_strict_get_check(skb))
2132		return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
2133					      tb, NETCONFA_MAX,
2134					      devconf_ipv4_policy, extack);
2135
2136	err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
2137					    tb, NETCONFA_MAX,
2138					    devconf_ipv4_policy, extack);
2139	if (err)
2140		return err;
2141
2142	for (i = 0; i <= NETCONFA_MAX; i++) {
2143		if (!tb[i])
2144			continue;
2145
2146		switch (i) {
2147		case NETCONFA_IFINDEX:
2148			break;
2149		default:
2150			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in netconf get request");
2151			return -EINVAL;
2152		}
2153	}
2154
2155	return 0;
2156}
2157
2158static int inet_netconf_get_devconf(struct sk_buff *in_skb,
2159				    struct nlmsghdr *nlh,
2160				    struct netlink_ext_ack *extack)
2161{
2162	struct net *net = sock_net(in_skb->sk);
2163	struct nlattr *tb[NETCONFA_MAX+1];
2164	struct sk_buff *skb;
2165	struct ipv4_devconf *devconf;
2166	struct in_device *in_dev;
2167	struct net_device *dev;
2168	int ifindex;
2169	int err;
2170
2171	err = inet_netconf_valid_get_req(in_skb, nlh, tb, extack);
2172	if (err)
2173		goto errout;
2174
2175	err = -EINVAL;
2176	if (!tb[NETCONFA_IFINDEX])
2177		goto errout;
2178
2179	ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
2180	switch (ifindex) {
2181	case NETCONFA_IFINDEX_ALL:
2182		devconf = net->ipv4.devconf_all;
2183		break;
2184	case NETCONFA_IFINDEX_DEFAULT:
2185		devconf = net->ipv4.devconf_dflt;
2186		break;
2187	default:
2188		dev = __dev_get_by_index(net, ifindex);
2189		if (!dev)
2190			goto errout;
2191		in_dev = __in_dev_get_rtnl(dev);
2192		if (!in_dev)
2193			goto errout;
2194		devconf = &in_dev->cnf;
2195		break;
2196	}
2197
2198	err = -ENOBUFS;
2199	skb = nlmsg_new(inet_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
2200	if (!skb)
2201		goto errout;
2202
2203	err = inet_netconf_fill_devconf(skb, ifindex, devconf,
2204					NETLINK_CB(in_skb).portid,
2205					nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
2206					NETCONFA_ALL);
2207	if (err < 0) {
2208		/* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */
2209		WARN_ON(err == -EMSGSIZE);
2210		kfree_skb(skb);
2211		goto errout;
2212	}
2213	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2214errout:
2215	return err;
2216}
2217
2218static int inet_netconf_dump_devconf(struct sk_buff *skb,
2219				     struct netlink_callback *cb)
2220{
2221	const struct nlmsghdr *nlh = cb->nlh;
2222	struct net *net = sock_net(skb->sk);
2223	int h, s_h;
2224	int idx, s_idx;
2225	struct net_device *dev;
2226	struct in_device *in_dev;
2227	struct hlist_head *head;
2228
2229	if (cb->strict_check) {
2230		struct netlink_ext_ack *extack = cb->extack;
2231		struct netconfmsg *ncm;
2232
2233		if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
2234			NL_SET_ERR_MSG(extack, "ipv4: Invalid header for netconf dump request");
2235			return -EINVAL;
2236		}
2237
2238		if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
2239			NL_SET_ERR_MSG(extack, "ipv4: Invalid data after header in netconf dump request");
2240			return -EINVAL;
2241		}
2242	}
2243
2244	s_h = cb->args[0];
2245	s_idx = idx = cb->args[1];
2246
2247	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
2248		idx = 0;
2249		head = &net->dev_index_head[h];
2250		rcu_read_lock();
2251		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
2252			  net->dev_base_seq;
2253		hlist_for_each_entry_rcu(dev, head, index_hlist) {
2254			if (idx < s_idx)
2255				goto cont;
2256			in_dev = __in_dev_get_rcu(dev);
2257			if (!in_dev)
2258				goto cont;
2259
2260			if (inet_netconf_fill_devconf(skb, dev->ifindex,
2261						      &in_dev->cnf,
2262						      NETLINK_CB(cb->skb).portid,
2263						      nlh->nlmsg_seq,
2264						      RTM_NEWNETCONF,
2265						      NLM_F_MULTI,
2266						      NETCONFA_ALL) < 0) {
2267				rcu_read_unlock();
2268				goto done;
2269			}
2270			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2271cont:
2272			idx++;
2273		}
2274		rcu_read_unlock();
2275	}
2276	if (h == NETDEV_HASHENTRIES) {
2277		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
2278					      net->ipv4.devconf_all,
2279					      NETLINK_CB(cb->skb).portid,
2280					      nlh->nlmsg_seq,
2281					      RTM_NEWNETCONF, NLM_F_MULTI,
2282					      NETCONFA_ALL) < 0)
2283			goto done;
2284		else
2285			h++;
2286	}
2287	if (h == NETDEV_HASHENTRIES + 1) {
2288		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
2289					      net->ipv4.devconf_dflt,
2290					      NETLINK_CB(cb->skb).portid,
2291					      nlh->nlmsg_seq,
2292					      RTM_NEWNETCONF, NLM_F_MULTI,
2293					      NETCONFA_ALL) < 0)
2294			goto done;
2295		else
2296			h++;
2297	}
2298done:
2299	cb->args[0] = h;
2300	cb->args[1] = idx;
2301
2302	return skb->len;
2303}
2304
2305#ifdef CONFIG_SYSCTL
2306
2307static void devinet_copy_dflt_conf(struct net *net, int i)
2308{
2309	struct net_device *dev;
2310
2311	rcu_read_lock();
2312	for_each_netdev_rcu(net, dev) {
2313		struct in_device *in_dev;
2314
2315		in_dev = __in_dev_get_rcu(dev);
2316		if (in_dev && !test_bit(i, in_dev->cnf.state))
2317			in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
2318	}
2319	rcu_read_unlock();
2320}
2321
2322/* called with RTNL locked */
2323static void inet_forward_change(struct net *net)
2324{
2325	struct net_device *dev;
2326	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
2327
2328	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
2329	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
2330	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2331				    NETCONFA_FORWARDING,
2332				    NETCONFA_IFINDEX_ALL,
2333				    net->ipv4.devconf_all);
2334	inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2335				    NETCONFA_FORWARDING,
2336				    NETCONFA_IFINDEX_DEFAULT,
2337				    net->ipv4.devconf_dflt);
2338
2339	for_each_netdev(net, dev) {
2340		struct in_device *in_dev;
2341
2342		if (on)
2343			dev_disable_lro(dev);
2344
2345		in_dev = __in_dev_get_rtnl(dev);
2346		if (in_dev) {
2347			IN_DEV_CONF_SET(in_dev, FORWARDING, on);
2348			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2349						    NETCONFA_FORWARDING,
2350						    dev->ifindex, &in_dev->cnf);
2351		}
2352	}
2353}
2354
2355static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
2356{
2357	if (cnf == net->ipv4.devconf_dflt)
2358		return NETCONFA_IFINDEX_DEFAULT;
2359	else if (cnf == net->ipv4.devconf_all)
2360		return NETCONFA_IFINDEX_ALL;
2361	else {
2362		struct in_device *idev
2363			= container_of(cnf, struct in_device, cnf);
2364		return idev->dev->ifindex;
2365	}
2366}
2367
2368static int devinet_conf_proc(struct ctl_table *ctl, int write,
2369			     void __user *buffer,
2370			     size_t *lenp, loff_t *ppos)
2371{
2372	int old_value = *(int *)ctl->data;
2373	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2374	int new_value = *(int *)ctl->data;
2375
2376	if (write) {
2377		struct ipv4_devconf *cnf = ctl->extra1;
2378		struct net *net = ctl->extra2;
2379		int i = (int *)ctl->data - cnf->data;
2380		int ifindex;
2381
2382		set_bit(i, cnf->state);
2383
2384		if (cnf == net->ipv4.devconf_dflt)
2385			devinet_copy_dflt_conf(net, i);
2386		if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
2387		    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
2388			if ((new_value == 0) && (old_value != 0))
2389				rt_cache_flush(net);
2390
2391		if (i == IPV4_DEVCONF_BC_FORWARDING - 1 &&
2392		    new_value != old_value)
2393			rt_cache_flush(net);
2394
2395		if (i == IPV4_DEVCONF_RP_FILTER - 1 &&
2396		    new_value != old_value) {
2397			ifindex = devinet_conf_ifindex(net, cnf);
2398			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2399						    NETCONFA_RP_FILTER,
2400						    ifindex, cnf);
2401		}
2402		if (i == IPV4_DEVCONF_PROXY_ARP - 1 &&
2403		    new_value != old_value) {
2404			ifindex = devinet_conf_ifindex(net, cnf);
2405			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2406						    NETCONFA_PROXY_NEIGH,
2407						    ifindex, cnf);
2408		}
2409		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
2410		    new_value != old_value) {
2411			ifindex = devinet_conf_ifindex(net, cnf);
2412			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2413						    NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
2414						    ifindex, cnf);
2415		}
2416	}
2417
2418	return ret;
2419}
2420
2421static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
2422				  void __user *buffer,
2423				  size_t *lenp, loff_t *ppos)
2424{
2425	int *valp = ctl->data;
2426	int val = *valp;
2427	loff_t pos = *ppos;
2428	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2429
2430	if (write && *valp != val) {
2431		struct net *net = ctl->extra2;
2432
2433		if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
2434			if (!rtnl_trylock()) {
2435				/* Restore the original values before restarting */
2436				*valp = val;
2437				*ppos = pos;
2438				return restart_syscall();
2439			}
2440			if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
2441				inet_forward_change(net);
2442			} else {
2443				struct ipv4_devconf *cnf = ctl->extra1;
2444				struct in_device *idev =
2445					container_of(cnf, struct in_device, cnf);
2446				if (*valp)
2447					dev_disable_lro(idev->dev);
2448				inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2449							    NETCONFA_FORWARDING,
2450							    idev->dev->ifindex,
2451							    cnf);
2452			}
2453			rtnl_unlock();
2454			rt_cache_flush(net);
2455		} else
2456			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
2457						    NETCONFA_FORWARDING,
2458						    NETCONFA_IFINDEX_DEFAULT,
2459						    net->ipv4.devconf_dflt);
2460	}
2461
2462	return ret;
2463}
2464
2465static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
2466				void __user *buffer,
2467				size_t *lenp, loff_t *ppos)
2468{
2469	int *valp = ctl->data;
2470	int val = *valp;
2471	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2472	struct net *net = ctl->extra2;
2473
2474	if (write && *valp != val)
2475		rt_cache_flush(net);
2476
2477	return ret;
2478}
2479
2480#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
2481	{ \
2482		.procname	= name, \
2483		.data		= ipv4_devconf.data + \
2484				  IPV4_DEVCONF_ ## attr - 1, \
2485		.maxlen		= sizeof(int), \
2486		.mode		= mval, \
2487		.proc_handler	= proc, \
2488		.extra1		= &ipv4_devconf, \
2489	}
2490
2491#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
2492	DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
2493
2494#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
2495	DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
2496
2497#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
2498	DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
2499
2500#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
2501	DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
2502
2503static struct devinet_sysctl_table {
2504	struct ctl_table_header *sysctl_header;
2505	struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
2506} devinet_sysctl = {
2507	.devinet_vars = {
2508		DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
2509					     devinet_sysctl_forward),
2510		DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
2511		DEVINET_SYSCTL_RW_ENTRY(BC_FORWARDING, "bc_forwarding"),
2512
2513		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
2514		DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
2515		DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
2516		DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
2517		DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
2518		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
2519					"accept_source_route"),
2520		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
2521		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
2522		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
2523		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
2524		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
2525		DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
2526		DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
2527		DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
2528		DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
2529		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
2530		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
2531		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
2532		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
2533		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
2534					"force_igmp_version"),
2535		DEVINET_SYSCTL_RW_ENTRY(IGMPV2_UNSOLICITED_REPORT_INTERVAL,
2536					"igmpv2_unsolicited_report_interval"),
2537		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
2538					"igmpv3_unsolicited_report_interval"),
2539		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
2540					"ignore_routes_with_linkdown"),
2541		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
2542					"drop_gratuitous_arp"),
2543
2544		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
2545		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
2546		DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
2547					      "promote_secondaries"),
2548		DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
2549					      "route_localnet"),
2550		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
2551					      "drop_unicast_in_l2_multicast"),
2552	},
2553};
2554
2555static int __devinet_sysctl_register(struct net *net, char *dev_name,
2556				     int ifindex, struct ipv4_devconf *p)
2557{
2558	int i;
2559	struct devinet_sysctl_table *t;
2560	char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
2561
2562	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
2563	if (!t)
2564		goto out;
2565
2566	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
2567		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
2568		t->devinet_vars[i].extra1 = p;
2569		t->devinet_vars[i].extra2 = net;
2570	}
2571
2572	snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
2573
2574	t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
2575	if (!t->sysctl_header)
2576		goto free;
2577
2578	p->sysctl = t;
2579
2580	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL,
2581				    ifindex, p);
2582	return 0;
2583
2584free:
2585	kfree(t);
2586out:
2587	return -ENOBUFS;
2588}
2589
2590static void __devinet_sysctl_unregister(struct net *net,
2591					struct ipv4_devconf *cnf, int ifindex)
2592{
2593	struct devinet_sysctl_table *t = cnf->sysctl;
2594
2595	if (t) {
2596		cnf->sysctl = NULL;
2597		unregister_net_sysctl_table(t->sysctl_header);
2598		kfree(t);
2599	}
2600
2601	inet_netconf_notify_devconf(net, RTM_DELNETCONF, 0, ifindex, NULL);
2602}
2603
2604static int devinet_sysctl_register(struct in_device *idev)
2605{
2606	int err;
2607
2608	if (!sysctl_dev_name_is_allowed(idev->dev->name))
2609		return -EINVAL;
2610
2611	err = neigh_sysctl_register(idev->dev, idev->arp_parms, NULL);
2612	if (err)
2613		return err;
2614	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
2615					idev->dev->ifindex, &idev->cnf);
2616	if (err)
2617		neigh_sysctl_unregister(idev->arp_parms);
2618	return err;
2619}
2620
2621static void devinet_sysctl_unregister(struct in_device *idev)
2622{
2623	struct net *net = dev_net(idev->dev);
2624
2625	__devinet_sysctl_unregister(net, &idev->cnf, idev->dev->ifindex);
2626	neigh_sysctl_unregister(idev->arp_parms);
2627}
2628
2629static struct ctl_table ctl_forward_entry[] = {
2630	{
2631		.procname	= "ip_forward",
2632		.data		= &ipv4_devconf.data[
2633					IPV4_DEVCONF_FORWARDING - 1],
2634		.maxlen		= sizeof(int),
2635		.mode		= 0644,
2636		.proc_handler	= devinet_sysctl_forward,
2637		.extra1		= &ipv4_devconf,
2638		.extra2		= &init_net,
2639	},
2640	{ },
2641};
2642#endif
2643
2644static __net_init int devinet_init_net(struct net *net)
2645{
2646	int err;
2647	struct ipv4_devconf *all, *dflt;
2648#ifdef CONFIG_SYSCTL
2649	struct ctl_table *tbl;
2650	struct ctl_table_header *forw_hdr;
2651#endif
2652
2653	err = -ENOMEM;
2654	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
2655	if (!all)
2656		goto err_alloc_all;
2657
2658	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
2659	if (!dflt)
2660		goto err_alloc_dflt;
2661
2662#ifdef CONFIG_SYSCTL
2663	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
2664	if (!tbl)
2665		goto err_alloc_ctl;
2666
2667	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
2668	tbl[0].extra1 = all;
2669	tbl[0].extra2 = net;
2670#endif
2671
2672	if ((!IS_ENABLED(CONFIG_SYSCTL) ||
2673	     sysctl_devconf_inherit_init_net != 2) &&
2674	    !net_eq(net, &init_net)) {
2675		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
2676		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
2677	}
2678
2679#ifdef CONFIG_SYSCTL
2680	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
2681	if (err < 0)
2682		goto err_reg_all;
2683
2684	err = __devinet_sysctl_register(net, "default",
2685					NETCONFA_IFINDEX_DEFAULT, dflt);
2686	if (err < 0)
2687		goto err_reg_dflt;
2688
2689	err = -ENOMEM;
2690	forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
2691	if (!forw_hdr)
2692		goto err_reg_ctl;
2693	net->ipv4.forw_hdr = forw_hdr;
2694#endif
2695
2696	net->ipv4.devconf_all = all;
2697	net->ipv4.devconf_dflt = dflt;
2698	return 0;
2699
2700#ifdef CONFIG_SYSCTL
2701err_reg_ctl:
2702	__devinet_sysctl_unregister(net, dflt, NETCONFA_IFINDEX_DEFAULT);
2703err_reg_dflt:
2704	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
2705err_reg_all:
2706	kfree(tbl);
2707err_alloc_ctl:
2708#endif
2709	kfree(dflt);
2710err_alloc_dflt:
2711	kfree(all);
2712err_alloc_all:
2713	return err;
2714}
2715
2716static __net_exit void devinet_exit_net(struct net *net)
2717{
2718#ifdef CONFIG_SYSCTL
2719	struct ctl_table *tbl;
2720
2721	tbl = net->ipv4.forw_hdr->ctl_table_arg;
2722	unregister_net_sysctl_table(net->ipv4.forw_hdr);
2723	__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
2724				    NETCONFA_IFINDEX_DEFAULT);
2725	__devinet_sysctl_unregister(net, net->ipv4.devconf_all,
2726				    NETCONFA_IFINDEX_ALL);
2727	kfree(tbl);
2728#endif
2729	kfree(net->ipv4.devconf_dflt);
2730	kfree(net->ipv4.devconf_all);
2731}
2732
2733static __net_initdata struct pernet_operations devinet_ops = {
2734	.init = devinet_init_net,
2735	.exit = devinet_exit_net,
2736};
2737
2738static struct rtnl_af_ops inet_af_ops __read_mostly = {
2739	.family		  = AF_INET,
2740	.fill_link_af	  = inet_fill_link_af,
2741	.get_link_af_size = inet_get_link_af_size,
2742	.validate_link_af = inet_validate_link_af,
2743	.set_link_af	  = inet_set_link_af,
2744};
2745
2746void __init devinet_init(void)
2747{
2748	int i;
2749
2750	for (i = 0; i < IN4_ADDR_HSIZE; i++)
2751		INIT_HLIST_HEAD(&inet_addr_lst[i]);
2752
2753	register_pernet_subsys(&devinet_ops);
2754
2755	register_gifconf(PF_INET, inet_gifconf);
2756	register_netdevice_notifier(&ip_netdev_notifier);
2757
2758	queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2759
2760	rtnl_af_register(&inet_af_ops);
2761
2762	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);
2763	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0);
2764	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0);
2765	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2766		      inet_netconf_dump_devconf, 0);
2767}