Linux Audio

Check our new training course

Loading...
v3.1
  1/*
  2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
  3 *		operating system.  INET is implemented using the  BSD Socket
  4 *		interface as the means of communication with the user level.
  5 *
  6 *		"Ping" sockets
  7 *
  8 *		This program is free software; you can redistribute it and/or
  9 *		modify it under the terms of the GNU General Public License
 10 *		as published by the Free Software Foundation; either version
 11 *		2 of the License, or (at your option) any later version.
 12 *
 13 * Based on ipv4/udp.c code.
 14 *
 15 * Authors:	Vasiliy Kulikov / Openwall (for Linux 2.6),
 16 *		Pavel Kankovsky (for Linux 2.4.32)
 17 *
 18 * Pavel gave all rights to bugs to Vasiliy,
 19 * none of the bugs are Pavel's now.
 20 *
 21 */
 22
 23#include <asm/system.h>
 24#include <linux/uaccess.h>
 25#include <linux/types.h>
 26#include <linux/fcntl.h>
 27#include <linux/socket.h>
 28#include <linux/sockios.h>
 29#include <linux/in.h>
 30#include <linux/errno.h>
 31#include <linux/timer.h>
 32#include <linux/mm.h>
 33#include <linux/inet.h>
 34#include <linux/netdevice.h>
 35#include <net/snmp.h>
 36#include <net/ip.h>
 37#include <net/ipv6.h>
 38#include <net/icmp.h>
 39#include <net/protocol.h>
 40#include <linux/skbuff.h>
 41#include <linux/proc_fs.h>
 
 42#include <net/sock.h>
 43#include <net/ping.h>
 44#include <net/udp.h>
 45#include <net/route.h>
 46#include <net/inet_common.h>
 47#include <net/checksum.h>
 48
 49
 50static struct ping_table ping_table;
 51
 52static u16 ping_port_rover;
 53
 54static inline int ping_hashfn(struct net *net, unsigned num, unsigned mask)
 55{
 56	int res = (num + net_hash_mix(net)) & mask;
 
 57	pr_debug("hash(%d) = %d\n", num, res);
 58	return res;
 59}
 60
 61static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
 62					     struct net *net, unsigned num)
 63{
 64	return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
 65}
 66
 67static int ping_v4_get_port(struct sock *sk, unsigned short ident)
 68{
 69	struct hlist_nulls_node *node;
 70	struct hlist_nulls_head *hlist;
 71	struct inet_sock *isk, *isk2;
 72	struct sock *sk2 = NULL;
 73
 74	isk = inet_sk(sk);
 75	write_lock_bh(&ping_table.lock);
 76	if (ident == 0) {
 77		u32 i;
 78		u16 result = ping_port_rover + 1;
 79
 80		for (i = 0; i < (1L << 16); i++, result++) {
 81			if (!result)
 82				result++; /* avoid zero */
 83			hlist = ping_hashslot(&ping_table, sock_net(sk),
 84					    result);
 85			ping_portaddr_for_each_entry(sk2, node, hlist) {
 86				isk2 = inet_sk(sk2);
 87
 88				if (isk2->inet_num == result)
 89					goto next_port;
 90			}
 91
 92			/* found */
 93			ping_port_rover = ident = result;
 94			break;
 95next_port:
 96			;
 97		}
 98		if (i >= (1L << 16))
 99			goto fail;
100	} else {
101		hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
102		ping_portaddr_for_each_entry(sk2, node, hlist) {
103			isk2 = inet_sk(sk2);
104
105			if ((isk2->inet_num == ident) &&
106			    (sk2 != sk) &&
107			    (!sk2->sk_reuse || !sk->sk_reuse))
108				goto fail;
109		}
110	}
111
112	pr_debug("found port/ident = %d\n", ident);
113	isk->inet_num = ident;
114	if (sk_unhashed(sk)) {
115		pr_debug("was not hashed\n");
116		sock_hold(sk);
117		hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
118		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
119	}
120	write_unlock_bh(&ping_table.lock);
121	return 0;
122
123fail:
124	write_unlock_bh(&ping_table.lock);
125	return 1;
126}
127
128static void ping_v4_hash(struct sock *sk)
129{
130	pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
131	BUG(); /* "Please do not press this button again." */
132}
133
134static void ping_v4_unhash(struct sock *sk)
135{
136	struct inet_sock *isk = inet_sk(sk);
137	pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
138	if (sk_hashed(sk)) {
139		write_lock_bh(&ping_table.lock);
140		hlist_nulls_del(&sk->sk_nulls_node);
141		sock_put(sk);
142		isk->inet_num = isk->inet_sport = 0;
 
143		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
144		write_unlock_bh(&ping_table.lock);
145	}
146}
147
148static struct sock *ping_v4_lookup(struct net *net, u32 saddr, u32 daddr,
149				   u16 ident, int dif)
150{
151	struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
152	struct sock *sk = NULL;
153	struct inet_sock *isk;
154	struct hlist_nulls_node *hnode;
155
156	pr_debug("try to find: num = %d, daddr = %ld, dif = %d\n",
157			 (int)ident, (unsigned long)daddr, dif);
158	read_lock_bh(&ping_table.lock);
159
160	ping_portaddr_for_each_entry(sk, hnode, hslot) {
161		isk = inet_sk(sk);
162
163		pr_debug("found: %p: num = %d, daddr = %ld, dif = %d\n", sk,
164			 (int)isk->inet_num, (unsigned long)isk->inet_rcv_saddr,
165			 sk->sk_bound_dev_if);
166
167		pr_debug("iterate\n");
168		if (isk->inet_num != ident)
169			continue;
170		if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
171			continue;
172		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
173			continue;
174
175		sock_hold(sk);
176		goto exit;
177	}
178
179	sk = NULL;
180exit:
181	read_unlock_bh(&ping_table.lock);
182
183	return sk;
184}
185
186static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
187					  gid_t *high)
188{
189	gid_t *data = net->ipv4.sysctl_ping_group_range;
190	unsigned seq;
 
191	do {
192		seq = read_seqbegin(&sysctl_local_ports.lock);
193
194		*low = data[0];
195		*high = data[1];
196	} while (read_seqretry(&sysctl_local_ports.lock, seq));
197}
198
199
200static int ping_init_sock(struct sock *sk)
201{
202	struct net *net = sock_net(sk);
203	gid_t group = current_egid();
204	gid_t range[2];
205	struct group_info *group_info = get_current_groups();
206	int i, j, count = group_info->ngroups;
 
207
208	inet_get_ping_group_range_net(net, range, range+1);
 
 
 
 
 
209	if (range[0] <= group && group <= range[1])
210		return 0;
211
212	for (i = 0; i < group_info->nblocks; i++) {
213		int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
214
215		for (j = 0; j < cp_count; j++) {
216			group = group_info->blocks[i][j];
217			if (range[0] <= group && group <= range[1])
218				return 0;
219		}
220
221		count -= cp_count;
222	}
223
224	return -EACCES;
225}
226
227static void ping_close(struct sock *sk, long timeout)
228{
229	pr_debug("ping_close(sk=%p,sk->num=%u)\n",
230		inet_sk(sk), inet_sk(sk)->inet_num);
231	pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
232
233	sk_common_release(sk);
234}
235
236/*
237 * We need our own bind because there are no privileged id's == local ports.
238 * Moreover, we don't allow binding to multi- and broadcast addresses.
239 */
240
241static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
242{
243	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
244	struct inet_sock *isk = inet_sk(sk);
245	unsigned short snum;
246	int chk_addr_ret;
247	int err;
248
249	if (addr_len < sizeof(struct sockaddr_in))
250		return -EINVAL;
251
252	pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
253		sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
254
255	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
256	if (addr->sin_addr.s_addr == INADDR_ANY)
257		chk_addr_ret = RTN_LOCAL;
258
259	if ((sysctl_ip_nonlocal_bind == 0 &&
260	    isk->freebind == 0 && isk->transparent == 0 &&
261	     chk_addr_ret != RTN_LOCAL) ||
262	    chk_addr_ret == RTN_MULTICAST ||
263	    chk_addr_ret == RTN_BROADCAST)
264		return -EADDRNOTAVAIL;
265
266	lock_sock(sk);
267
268	err = -EINVAL;
269	if (isk->inet_num != 0)
270		goto out;
271
272	err = -EADDRINUSE;
273	isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
274	snum = ntohs(addr->sin_port);
275	if (ping_v4_get_port(sk, snum) != 0) {
276		isk->inet_saddr = isk->inet_rcv_saddr = 0;
277		goto out;
278	}
279
280	pr_debug("after bind(): num = %d, daddr = %ld, dif = %d\n",
281		(int)isk->inet_num,
282		(unsigned long) isk->inet_rcv_saddr,
283		(int)sk->sk_bound_dev_if);
284
285	err = 0;
286	if (isk->inet_rcv_saddr)
287		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
288	if (snum)
289		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
290	isk->inet_sport = htons(isk->inet_num);
291	isk->inet_daddr = 0;
292	isk->inet_dport = 0;
293	sk_dst_reset(sk);
294out:
295	release_sock(sk);
296	pr_debug("ping_v4_bind -> %d\n", err);
297	return err;
298}
299
300/*
301 * Is this a supported type of ICMP message?
302 */
303
304static inline int ping_supported(int type, int code)
305{
306	if (type == ICMP_ECHO && code == 0)
307		return 1;
308	return 0;
309}
310
311/*
312 * This routine is called by the ICMP module when it gets some
313 * sort of error condition.
314 */
315
316static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
317
318void ping_err(struct sk_buff *skb, u32 info)
319{
320	struct iphdr *iph = (struct iphdr *)skb->data;
321	struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
322	struct inet_sock *inet_sock;
323	int type = icmph->type;
324	int code = icmph->code;
325	struct net *net = dev_net(skb->dev);
326	struct sock *sk;
327	int harderr;
328	int err;
329
330	/* We assume the packet has already been checked by icmp_unreach */
331
332	if (!ping_supported(icmph->type, icmph->code))
333		return;
334
335	pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
336		code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
337
338	sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
339			    ntohs(icmph->un.echo.id), skb->dev->ifindex);
340	if (sk == NULL) {
341		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
342		pr_debug("no socket, dropping\n");
343		return;	/* No socket for error */
344	}
345	pr_debug("err on socket %p\n", sk);
346
347	err = 0;
348	harderr = 0;
349	inet_sock = inet_sk(sk);
350
351	switch (type) {
352	default:
353	case ICMP_TIME_EXCEEDED:
354		err = EHOSTUNREACH;
355		break;
356	case ICMP_SOURCE_QUENCH:
357		/* This is not a real error but ping wants to see it.
358		 * Report it with some fake errno. */
359		err = EREMOTEIO;
360		break;
361	case ICMP_PARAMETERPROB:
362		err = EPROTO;
363		harderr = 1;
364		break;
365	case ICMP_DEST_UNREACH:
366		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
367			if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
368				err = EMSGSIZE;
369				harderr = 1;
370				break;
371			}
372			goto out;
373		}
374		err = EHOSTUNREACH;
375		if (code <= NR_ICMP_UNREACH) {
376			harderr = icmp_err_convert[code].fatal;
377			err = icmp_err_convert[code].errno;
378		}
379		break;
380	case ICMP_REDIRECT:
381		/* See ICMP_SOURCE_QUENCH */
382		err = EREMOTEIO;
383		break;
384	}
385
386	/*
387	 *      RFC1122: OK.  Passes ICMP errors back to application, as per
388	 *	4.1.3.3.
389	 */
390	if (!inet_sock->recverr) {
391		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
392			goto out;
393	} else {
394		ip_icmp_error(sk, skb, err, 0 /* no remote port */,
395			 info, (u8 *)icmph);
396	}
397	sk->sk_err = err;
398	sk->sk_error_report(sk);
399out:
400	sock_put(sk);
401}
402
403/*
404 *	Copy and checksum an ICMP Echo packet from user space into a buffer.
405 */
406
407struct pingfakehdr {
408	struct icmphdr icmph;
409	struct iovec *iov;
410	u32 wcheck;
411};
412
413static int ping_getfrag(void *from, char * to,
414			int offset, int fraglen, int odd, struct sk_buff *skb)
415{
416	struct pingfakehdr *pfh = (struct pingfakehdr *)from;
417
418	if (offset == 0) {
419		if (fraglen < sizeof(struct icmphdr))
420			BUG();
421		if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
422			    pfh->iov, 0, fraglen - sizeof(struct icmphdr),
423			    &pfh->wcheck))
424			return -EFAULT;
425
426		return 0;
427	}
428	if (offset < sizeof(struct icmphdr))
429		BUG();
430	if (csum_partial_copy_fromiovecend
431			(to, pfh->iov, offset - sizeof(struct icmphdr),
432			 fraglen, &pfh->wcheck))
433		return -EFAULT;
434	return 0;
435}
436
437static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
438				    struct flowi4 *fl4)
439{
440	struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
441
442	pfh->wcheck = csum_partial((char *)&pfh->icmph,
443		sizeof(struct icmphdr), pfh->wcheck);
444	pfh->icmph.checksum = csum_fold(pfh->wcheck);
445	memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr));
446	skb->ip_summed = CHECKSUM_NONE;
447	return ip_push_pending_frames(sk, fl4);
448}
449
450static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
451			size_t len)
452{
453	struct net *net = sock_net(sk);
454	struct flowi4 fl4;
455	struct inet_sock *inet = inet_sk(sk);
456	struct ipcm_cookie ipc;
457	struct icmphdr user_icmph;
458	struct pingfakehdr pfh;
459	struct rtable *rt = NULL;
460	struct ip_options_data opt_copy;
461	int free = 0;
462	u32 saddr, daddr, faddr;
463	u8  tos;
464	int err;
465
466	pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
467
468
469	if (len > 0xFFFF)
470		return -EMSGSIZE;
471
472	/*
473	 *	Check the flags.
474	 */
475
476	/* Mirror BSD error message compatibility */
477	if (msg->msg_flags & MSG_OOB)
478		return -EOPNOTSUPP;
479
480	/*
481	 *	Fetch the ICMP header provided by the userland.
482	 *	iovec is modified!
483	 */
484
485	if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
486			     sizeof(struct icmphdr)))
487		return -EFAULT;
488	if (!ping_supported(user_icmph.type, user_icmph.code))
489		return -EINVAL;
490
491	/*
492	 *	Get and verify the address.
493	 */
494
495	if (msg->msg_name) {
496		struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
497		if (msg->msg_namelen < sizeof(*usin))
498			return -EINVAL;
499		if (usin->sin_family != AF_INET)
500			return -EINVAL;
501		daddr = usin->sin_addr.s_addr;
502		/* no remote port */
503	} else {
504		if (sk->sk_state != TCP_ESTABLISHED)
505			return -EDESTADDRREQ;
506		daddr = inet->inet_daddr;
507		/* no remote port */
508	}
509
510	ipc.addr = inet->inet_saddr;
511	ipc.opt = NULL;
512	ipc.oif = sk->sk_bound_dev_if;
513	ipc.tx_flags = 0;
514	err = sock_tx_timestamp(sk, &ipc.tx_flags);
515	if (err)
516		return err;
517
518	if (msg->msg_controllen) {
519		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
520		if (err)
521			return err;
522		if (ipc.opt)
523			free = 1;
524	}
525	if (!ipc.opt) {
526		struct ip_options_rcu *inet_opt;
527
528		rcu_read_lock();
529		inet_opt = rcu_dereference(inet->inet_opt);
530		if (inet_opt) {
531			memcpy(&opt_copy, inet_opt,
532			       sizeof(*inet_opt) + inet_opt->opt.optlen);
533			ipc.opt = &opt_copy.opt;
534		}
535		rcu_read_unlock();
536	}
537
538	saddr = ipc.addr;
539	ipc.addr = faddr = daddr;
540
541	if (ipc.opt && ipc.opt->opt.srr) {
542		if (!daddr)
543			return -EINVAL;
544		faddr = ipc.opt->opt.faddr;
545	}
546	tos = RT_TOS(inet->tos);
547	if (sock_flag(sk, SOCK_LOCALROUTE) ||
548	    (msg->msg_flags & MSG_DONTROUTE) ||
549	    (ipc.opt && ipc.opt->opt.is_strictroute)) {
550		tos |= RTO_ONLINK;
551	}
552
553	if (ipv4_is_multicast(daddr)) {
554		if (!ipc.oif)
555			ipc.oif = inet->mc_index;
556		if (!saddr)
557			saddr = inet->mc_addr;
558	}
 
559
560	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
561			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
562			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
563
564	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
565	rt = ip_route_output_flow(net, &fl4, sk);
566	if (IS_ERR(rt)) {
567		err = PTR_ERR(rt);
568		rt = NULL;
569		if (err == -ENETUNREACH)
570			IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
571		goto out;
572	}
573
574	err = -EACCES;
575	if ((rt->rt_flags & RTCF_BROADCAST) &&
576	    !sock_flag(sk, SOCK_BROADCAST))
577		goto out;
578
579	if (msg->msg_flags & MSG_CONFIRM)
580		goto do_confirm;
581back_from_confirm:
582
583	if (!ipc.addr)
584		ipc.addr = fl4.daddr;
585
586	lock_sock(sk);
587
588	pfh.icmph.type = user_icmph.type; /* already checked */
589	pfh.icmph.code = user_icmph.code; /* ditto */
590	pfh.icmph.checksum = 0;
591	pfh.icmph.un.echo.id = inet->inet_sport;
592	pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
593	pfh.iov = msg->msg_iov;
594	pfh.wcheck = 0;
595
596	err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
597			0, &ipc, &rt, msg->msg_flags);
598	if (err)
599		ip_flush_pending_frames(sk);
600	else
601		err = ping_push_pending_frames(sk, &pfh, &fl4);
602	release_sock(sk);
603
604out:
605	ip_rt_put(rt);
606	if (free)
607		kfree(ipc.opt);
608	if (!err) {
609		icmp_out_count(sock_net(sk), user_icmph.type);
610		return len;
611	}
612	return err;
613
614do_confirm:
615	dst_confirm(&rt->dst);
616	if (!(msg->msg_flags & MSG_PROBE) || len)
617		goto back_from_confirm;
618	err = 0;
619	goto out;
620}
621
622static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
623			size_t len, int noblock, int flags, int *addr_len)
624{
625	struct inet_sock *isk = inet_sk(sk);
626	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
627	struct sk_buff *skb;
628	int copied, err;
629
630	pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
631
 
632	if (flags & MSG_OOB)
633		goto out;
634
635	if (addr_len)
636		*addr_len = sizeof(*sin);
637
638	if (flags & MSG_ERRQUEUE)
639		return ip_recv_error(sk, msg, len);
640
641	skb = skb_recv_datagram(sk, flags, noblock, &err);
642	if (!skb)
643		goto out;
644
645	copied = skb->len;
646	if (copied > len) {
647		msg->msg_flags |= MSG_TRUNC;
648		copied = len;
649	}
650
651	/* Don't bother checking the checksum */
652	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
653	if (err)
654		goto done;
655
656	sock_recv_timestamp(msg, sk, skb);
657
658	/* Copy the address. */
659	if (sin) {
660		sin->sin_family = AF_INET;
661		sin->sin_port = 0 /* skb->h.uh->source */;
662		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
663		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
664	}
665	if (isk->cmsg_flags)
666		ip_cmsg_recv(msg, skb);
667	err = copied;
668
669done:
670	skb_free_datagram(sk, skb);
671out:
672	pr_debug("ping_recvmsg -> %d\n", err);
673	return err;
674}
675
676static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
677{
678	pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
679		inet_sk(sk), inet_sk(sk)->inet_num, skb);
680	if (sock_queue_rcv_skb(sk, skb) < 0) {
681		ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_INERRORS);
682		kfree_skb(skb);
683		pr_debug("ping_queue_rcv_skb -> failed\n");
684		return -1;
685	}
686	return 0;
687}
688
689
690/*
691 *	All we need to do is get the socket.
692 */
693
694void ping_rcv(struct sk_buff *skb)
695{
696	struct sock *sk;
697	struct net *net = dev_net(skb->dev);
698	struct iphdr *iph = ip_hdr(skb);
699	struct icmphdr *icmph = icmp_hdr(skb);
700	u32 saddr = iph->saddr;
701	u32 daddr = iph->daddr;
702
703	/* We assume the packet has already been checked by icmp_rcv */
704
705	pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n",
706		skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
707
708	/* Push ICMP header back */
709	skb_push(skb, skb->data - (u8 *)icmph);
710
711	sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
712			    skb->dev->ifindex);
713	if (sk != NULL) {
714		pr_debug("rcv on socket %p\n", sk);
715		ping_queue_rcv_skb(sk, skb_get(skb));
716		sock_put(sk);
717		return;
718	}
719	pr_debug("no socket, dropping\n");
720
721	/* We're called from icmp_rcv(). kfree_skb() is done there. */
722}
723
724struct proto ping_prot = {
725	.name =		"PING",
726	.owner =	THIS_MODULE,
727	.init =		ping_init_sock,
728	.close =	ping_close,
729	.connect =	ip4_datagram_connect,
730	.disconnect =	udp_disconnect,
731	.setsockopt =	ip_setsockopt,
732	.getsockopt =	ip_getsockopt,
733	.sendmsg =	ping_sendmsg,
734	.recvmsg =	ping_recvmsg,
735	.bind =		ping_bind,
736	.backlog_rcv =	ping_queue_rcv_skb,
737	.hash =		ping_v4_hash,
738	.unhash =	ping_v4_unhash,
739	.get_port =	ping_v4_get_port,
740	.obj_size =	sizeof(struct inet_sock),
741};
742EXPORT_SYMBOL(ping_prot);
743
744#ifdef CONFIG_PROC_FS
745
746static struct sock *ping_get_first(struct seq_file *seq, int start)
747{
748	struct sock *sk;
749	struct ping_iter_state *state = seq->private;
750	struct net *net = seq_file_net(seq);
751
752	for (state->bucket = start; state->bucket < PING_HTABLE_SIZE;
753	     ++state->bucket) {
754		struct hlist_nulls_node *node;
755		struct hlist_nulls_head *hslot;
756
757		hslot = &ping_table.hash[state->bucket];
758
759		if (hlist_nulls_empty(hslot))
760			continue;
761
762		sk_nulls_for_each(sk, node, hslot) {
763			if (net_eq(sock_net(sk), net))
764				goto found;
765		}
766	}
767	sk = NULL;
768found:
769	return sk;
770}
771
772static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk)
773{
774	struct ping_iter_state *state = seq->private;
775	struct net *net = seq_file_net(seq);
776
777	do {
778		sk = sk_nulls_next(sk);
779	} while (sk && (!net_eq(sock_net(sk), net)));
780
781	if (!sk)
782		return ping_get_first(seq, state->bucket + 1);
783	return sk;
784}
785
786static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
787{
788	struct sock *sk = ping_get_first(seq, 0);
789
790	if (sk)
791		while (pos && (sk = ping_get_next(seq, sk)) != NULL)
792			--pos;
793	return pos ? NULL : sk;
794}
795
796static void *ping_seq_start(struct seq_file *seq, loff_t *pos)
797{
798	struct ping_iter_state *state = seq->private;
799	state->bucket = 0;
800
801	read_lock_bh(&ping_table.lock);
802
803	return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
804}
805
806static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
807{
808	struct sock *sk;
809
810	if (v == SEQ_START_TOKEN)
811		sk = ping_get_idx(seq, 0);
812	else
813		sk = ping_get_next(seq, v);
814
815	++*pos;
816	return sk;
817}
818
819static void ping_seq_stop(struct seq_file *seq, void *v)
820{
821	read_unlock_bh(&ping_table.lock);
822}
823
824static void ping_format_sock(struct sock *sp, struct seq_file *f,
825		int bucket, int *len)
826{
827	struct inet_sock *inet = inet_sk(sp);
828	__be32 dest = inet->inet_daddr;
829	__be32 src = inet->inet_rcv_saddr;
830	__u16 destp = ntohs(inet->inet_dport);
831	__u16 srcp = ntohs(inet->inet_sport);
832
833	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
834		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
835		bucket, src, srcp, dest, destp, sp->sk_state,
836		sk_wmem_alloc_get(sp),
837		sk_rmem_alloc_get(sp),
838		0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
839		atomic_read(&sp->sk_refcnt), sp,
840		atomic_read(&sp->sk_drops), len);
841}
842
843static int ping_seq_show(struct seq_file *seq, void *v)
844{
845	if (v == SEQ_START_TOKEN)
846		seq_printf(seq, "%-127s\n",
847			   "  sl  local_address rem_address   st tx_queue "
848			   "rx_queue tr tm->when retrnsmt   uid  timeout "
849			   "inode ref pointer drops");
850	else {
851		struct ping_iter_state *state = seq->private;
852		int len;
853
854		ping_format_sock(v, seq, state->bucket, &len);
855		seq_printf(seq, "%*s\n", 127 - len, "");
856	}
857	return 0;
858}
859
860static const struct seq_operations ping_seq_ops = {
861	.show		= ping_seq_show,
862	.start		= ping_seq_start,
863	.next		= ping_seq_next,
864	.stop		= ping_seq_stop,
865};
866
867static int ping_seq_open(struct inode *inode, struct file *file)
868{
869	return seq_open_net(inode, file, &ping_seq_ops,
870			   sizeof(struct ping_iter_state));
871}
872
873static const struct file_operations ping_seq_fops = {
874	.open		= ping_seq_open,
875	.read		= seq_read,
876	.llseek		= seq_lseek,
877	.release	= seq_release_net,
878};
879
880static int ping_proc_register(struct net *net)
881{
882	struct proc_dir_entry *p;
883	int rc = 0;
884
885	p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops);
886	if (!p)
887		rc = -ENOMEM;
888	return rc;
889}
890
891static void ping_proc_unregister(struct net *net)
892{
893	proc_net_remove(net, "icmp");
894}
895
896
897static int __net_init ping_proc_init_net(struct net *net)
898{
899	return ping_proc_register(net);
900}
901
902static void __net_exit ping_proc_exit_net(struct net *net)
903{
904	ping_proc_unregister(net);
905}
906
907static struct pernet_operations ping_net_ops = {
908	.init = ping_proc_init_net,
909	.exit = ping_proc_exit_net,
910};
911
912int __init ping_proc_init(void)
913{
914	return register_pernet_subsys(&ping_net_ops);
915}
916
917void ping_proc_exit(void)
918{
919	unregister_pernet_subsys(&ping_net_ops);
920}
921
922#endif
923
924void __init ping_init(void)
925{
926	int i;
927
928	for (i = 0; i < PING_HTABLE_SIZE; i++)
929		INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
930	rwlock_init(&ping_table.lock);
931}
v3.5.6
  1/*
  2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
  3 *		operating system.  INET is implemented using the  BSD Socket
  4 *		interface as the means of communication with the user level.
  5 *
  6 *		"Ping" sockets
  7 *
  8 *		This program is free software; you can redistribute it and/or
  9 *		modify it under the terms of the GNU General Public License
 10 *		as published by the Free Software Foundation; either version
 11 *		2 of the License, or (at your option) any later version.
 12 *
 13 * Based on ipv4/udp.c code.
 14 *
 15 * Authors:	Vasiliy Kulikov / Openwall (for Linux 2.6),
 16 *		Pavel Kankovsky (for Linux 2.4.32)
 17 *
 18 * Pavel gave all rights to bugs to Vasiliy,
 19 * none of the bugs are Pavel's now.
 20 *
 21 */
 22
 
 23#include <linux/uaccess.h>
 24#include <linux/types.h>
 25#include <linux/fcntl.h>
 26#include <linux/socket.h>
 27#include <linux/sockios.h>
 28#include <linux/in.h>
 29#include <linux/errno.h>
 30#include <linux/timer.h>
 31#include <linux/mm.h>
 32#include <linux/inet.h>
 33#include <linux/netdevice.h>
 34#include <net/snmp.h>
 35#include <net/ip.h>
 36#include <net/ipv6.h>
 37#include <net/icmp.h>
 38#include <net/protocol.h>
 39#include <linux/skbuff.h>
 40#include <linux/proc_fs.h>
 41#include <linux/export.h>
 42#include <net/sock.h>
 43#include <net/ping.h>
 44#include <net/udp.h>
 45#include <net/route.h>
 46#include <net/inet_common.h>
 47#include <net/checksum.h>
 48
 49
 50static struct ping_table ping_table;
 51
 52static u16 ping_port_rover;
 53
 54static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int mask)
 55{
 56	int res = (num + net_hash_mix(net)) & mask;
 57
 58	pr_debug("hash(%d) = %d\n", num, res);
 59	return res;
 60}
 61
 62static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
 63					     struct net *net, unsigned int num)
 64{
 65	return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
 66}
 67
 68static int ping_v4_get_port(struct sock *sk, unsigned short ident)
 69{
 70	struct hlist_nulls_node *node;
 71	struct hlist_nulls_head *hlist;
 72	struct inet_sock *isk, *isk2;
 73	struct sock *sk2 = NULL;
 74
 75	isk = inet_sk(sk);
 76	write_lock_bh(&ping_table.lock);
 77	if (ident == 0) {
 78		u32 i;
 79		u16 result = ping_port_rover + 1;
 80
 81		for (i = 0; i < (1L << 16); i++, result++) {
 82			if (!result)
 83				result++; /* avoid zero */
 84			hlist = ping_hashslot(&ping_table, sock_net(sk),
 85					    result);
 86			ping_portaddr_for_each_entry(sk2, node, hlist) {
 87				isk2 = inet_sk(sk2);
 88
 89				if (isk2->inet_num == result)
 90					goto next_port;
 91			}
 92
 93			/* found */
 94			ping_port_rover = ident = result;
 95			break;
 96next_port:
 97			;
 98		}
 99		if (i >= (1L << 16))
100			goto fail;
101	} else {
102		hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
103		ping_portaddr_for_each_entry(sk2, node, hlist) {
104			isk2 = inet_sk(sk2);
105
106			if ((isk2->inet_num == ident) &&
107			    (sk2 != sk) &&
108			    (!sk2->sk_reuse || !sk->sk_reuse))
109				goto fail;
110		}
111	}
112
113	pr_debug("found port/ident = %d\n", ident);
114	isk->inet_num = ident;
115	if (sk_unhashed(sk)) {
116		pr_debug("was not hashed\n");
117		sock_hold(sk);
118		hlist_nulls_add_head(&sk->sk_nulls_node, hlist);
119		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
120	}
121	write_unlock_bh(&ping_table.lock);
122	return 0;
123
124fail:
125	write_unlock_bh(&ping_table.lock);
126	return 1;
127}
128
129static void ping_v4_hash(struct sock *sk)
130{
131	pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
132	BUG(); /* "Please do not press this button again." */
133}
134
135static void ping_v4_unhash(struct sock *sk)
136{
137	struct inet_sock *isk = inet_sk(sk);
138	pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
139	if (sk_hashed(sk)) {
140		write_lock_bh(&ping_table.lock);
141		hlist_nulls_del(&sk->sk_nulls_node);
142		sock_put(sk);
143		isk->inet_num = 0;
144		isk->inet_sport = 0;
145		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
146		write_unlock_bh(&ping_table.lock);
147	}
148}
149
150static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr,
151				   u16 ident, int dif)
152{
153	struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
154	struct sock *sk = NULL;
155	struct inet_sock *isk;
156	struct hlist_nulls_node *hnode;
157
158	pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
159		 (int)ident, &daddr, dif);
160	read_lock_bh(&ping_table.lock);
161
162	ping_portaddr_for_each_entry(sk, hnode, hslot) {
163		isk = inet_sk(sk);
164
165		pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk,
166			 (int)isk->inet_num, &isk->inet_rcv_saddr,
167			 sk->sk_bound_dev_if);
168
169		pr_debug("iterate\n");
170		if (isk->inet_num != ident)
171			continue;
172		if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr)
173			continue;
174		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
175			continue;
176
177		sock_hold(sk);
178		goto exit;
179	}
180
181	sk = NULL;
182exit:
183	read_unlock_bh(&ping_table.lock);
184
185	return sk;
186}
187
188static void inet_get_ping_group_range_net(struct net *net, gid_t *low,
189					  gid_t *high)
190{
191	gid_t *data = net->ipv4.sysctl_ping_group_range;
192	unsigned int seq;
193
194	do {
195		seq = read_seqbegin(&sysctl_local_ports.lock);
196
197		*low = data[0];
198		*high = data[1];
199	} while (read_seqretry(&sysctl_local_ports.lock, seq));
200}
201
202
203static int ping_init_sock(struct sock *sk)
204{
205	struct net *net = sock_net(sk);
206	gid_t group = current_egid();
207	gid_t range[2];
208	struct group_info *group_info = get_current_groups();
209	int i, j, count = group_info->ngroups;
210	kgid_t low, high;
211
212	inet_get_ping_group_range_net(net, range, range+1);
213	low = make_kgid(&init_user_ns, range[0]);
214	high = make_kgid(&init_user_ns, range[1]);
215	if (!gid_valid(low) || !gid_valid(high) || gid_lt(high, low))
216		return -EACCES;
217
218	if (range[0] <= group && group <= range[1])
219		return 0;
220
221	for (i = 0; i < group_info->nblocks; i++) {
222		int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
 
223		for (j = 0; j < cp_count; j++) {
224			kgid_t gid = group_info->blocks[i][j];
225			if (gid_lte(low, gid) && gid_lte(gid, high))
226				return 0;
227		}
228
229		count -= cp_count;
230	}
231
232	return -EACCES;
233}
234
235static void ping_close(struct sock *sk, long timeout)
236{
237	pr_debug("ping_close(sk=%p,sk->num=%u)\n",
238		 inet_sk(sk), inet_sk(sk)->inet_num);
239	pr_debug("isk->refcnt = %d\n", sk->sk_refcnt.counter);
240
241	sk_common_release(sk);
242}
243
244/*
245 * We need our own bind because there are no privileged id's == local ports.
246 * Moreover, we don't allow binding to multi- and broadcast addresses.
247 */
248
249static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
250{
251	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
252	struct inet_sock *isk = inet_sk(sk);
253	unsigned short snum;
254	int chk_addr_ret;
255	int err;
256
257	if (addr_len < sizeof(struct sockaddr_in))
258		return -EINVAL;
259
260	pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n",
261		 sk, addr->sin_addr.s_addr, ntohs(addr->sin_port));
262
263	chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
264	if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
265		chk_addr_ret = RTN_LOCAL;
266
267	if ((sysctl_ip_nonlocal_bind == 0 &&
268	    isk->freebind == 0 && isk->transparent == 0 &&
269	     chk_addr_ret != RTN_LOCAL) ||
270	    chk_addr_ret == RTN_MULTICAST ||
271	    chk_addr_ret == RTN_BROADCAST)
272		return -EADDRNOTAVAIL;
273
274	lock_sock(sk);
275
276	err = -EINVAL;
277	if (isk->inet_num != 0)
278		goto out;
279
280	err = -EADDRINUSE;
281	isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr;
282	snum = ntohs(addr->sin_port);
283	if (ping_v4_get_port(sk, snum) != 0) {
284		isk->inet_saddr = isk->inet_rcv_saddr = 0;
285		goto out;
286	}
287
288	pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n",
289		 (int)isk->inet_num,
290		 &isk->inet_rcv_saddr,
291		 (int)sk->sk_bound_dev_if);
292
293	err = 0;
294	if (isk->inet_rcv_saddr)
295		sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
296	if (snum)
297		sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
298	isk->inet_sport = htons(isk->inet_num);
299	isk->inet_daddr = 0;
300	isk->inet_dport = 0;
301	sk_dst_reset(sk);
302out:
303	release_sock(sk);
304	pr_debug("ping_v4_bind -> %d\n", err);
305	return err;
306}
307
308/*
309 * Is this a supported type of ICMP message?
310 */
311
312static inline int ping_supported(int type, int code)
313{
314	if (type == ICMP_ECHO && code == 0)
315		return 1;
316	return 0;
317}
318
319/*
320 * This routine is called by the ICMP module when it gets some
321 * sort of error condition.
322 */
323
324static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
325
326void ping_err(struct sk_buff *skb, u32 info)
327{
328	struct iphdr *iph = (struct iphdr *)skb->data;
329	struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2));
330	struct inet_sock *inet_sock;
331	int type = icmph->type;
332	int code = icmph->code;
333	struct net *net = dev_net(skb->dev);
334	struct sock *sk;
335	int harderr;
336	int err;
337
338	/* We assume the packet has already been checked by icmp_unreach */
339
340	if (!ping_supported(icmph->type, icmph->code))
341		return;
342
343	pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type,
344		 code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
345
346	sk = ping_v4_lookup(net, iph->daddr, iph->saddr,
347			    ntohs(icmph->un.echo.id), skb->dev->ifindex);
348	if (sk == NULL) {
 
349		pr_debug("no socket, dropping\n");
350		return;	/* No socket for error */
351	}
352	pr_debug("err on socket %p\n", sk);
353
354	err = 0;
355	harderr = 0;
356	inet_sock = inet_sk(sk);
357
358	switch (type) {
359	default:
360	case ICMP_TIME_EXCEEDED:
361		err = EHOSTUNREACH;
362		break;
363	case ICMP_SOURCE_QUENCH:
364		/* This is not a real error but ping wants to see it.
365		 * Report it with some fake errno. */
366		err = EREMOTEIO;
367		break;
368	case ICMP_PARAMETERPROB:
369		err = EPROTO;
370		harderr = 1;
371		break;
372	case ICMP_DEST_UNREACH:
373		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
374			if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) {
375				err = EMSGSIZE;
376				harderr = 1;
377				break;
378			}
379			goto out;
380		}
381		err = EHOSTUNREACH;
382		if (code <= NR_ICMP_UNREACH) {
383			harderr = icmp_err_convert[code].fatal;
384			err = icmp_err_convert[code].errno;
385		}
386		break;
387	case ICMP_REDIRECT:
388		/* See ICMP_SOURCE_QUENCH */
389		err = EREMOTEIO;
390		break;
391	}
392
393	/*
394	 *      RFC1122: OK.  Passes ICMP errors back to application, as per
395	 *	4.1.3.3.
396	 */
397	if (!inet_sock->recverr) {
398		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
399			goto out;
400	} else {
401		ip_icmp_error(sk, skb, err, 0 /* no remote port */,
402			 info, (u8 *)icmph);
403	}
404	sk->sk_err = err;
405	sk->sk_error_report(sk);
406out:
407	sock_put(sk);
408}
409
410/*
411 *	Copy and checksum an ICMP Echo packet from user space into a buffer.
412 */
413
414struct pingfakehdr {
415	struct icmphdr icmph;
416	struct iovec *iov;
417	__wsum wcheck;
418};
419
420static int ping_getfrag(void *from, char *to,
421			int offset, int fraglen, int odd, struct sk_buff *skb)
422{
423	struct pingfakehdr *pfh = (struct pingfakehdr *)from;
424
425	if (offset == 0) {
426		if (fraglen < sizeof(struct icmphdr))
427			BUG();
428		if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
429			    pfh->iov, 0, fraglen - sizeof(struct icmphdr),
430			    &pfh->wcheck))
431			return -EFAULT;
432
433		return 0;
434	}
435	if (offset < sizeof(struct icmphdr))
436		BUG();
437	if (csum_partial_copy_fromiovecend
438			(to, pfh->iov, offset - sizeof(struct icmphdr),
439			 fraglen, &pfh->wcheck))
440		return -EFAULT;
441	return 0;
442}
443
444static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
445				    struct flowi4 *fl4)
446{
447	struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
448
449	pfh->wcheck = csum_partial((char *)&pfh->icmph,
450		sizeof(struct icmphdr), pfh->wcheck);
451	pfh->icmph.checksum = csum_fold(pfh->wcheck);
452	memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr));
453	skb->ip_summed = CHECKSUM_NONE;
454	return ip_push_pending_frames(sk, fl4);
455}
456
457static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
458			size_t len)
459{
460	struct net *net = sock_net(sk);
461	struct flowi4 fl4;
462	struct inet_sock *inet = inet_sk(sk);
463	struct ipcm_cookie ipc;
464	struct icmphdr user_icmph;
465	struct pingfakehdr pfh;
466	struct rtable *rt = NULL;
467	struct ip_options_data opt_copy;
468	int free = 0;
469	__be32 saddr, daddr, faddr;
470	u8  tos;
471	int err;
472
473	pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
474
475
476	if (len > 0xFFFF)
477		return -EMSGSIZE;
478
479	/*
480	 *	Check the flags.
481	 */
482
483	/* Mirror BSD error message compatibility */
484	if (msg->msg_flags & MSG_OOB)
485		return -EOPNOTSUPP;
486
487	/*
488	 *	Fetch the ICMP header provided by the userland.
489	 *	iovec is modified!
490	 */
491
492	if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov,
493			     sizeof(struct icmphdr)))
494		return -EFAULT;
495	if (!ping_supported(user_icmph.type, user_icmph.code))
496		return -EINVAL;
497
498	/*
499	 *	Get and verify the address.
500	 */
501
502	if (msg->msg_name) {
503		struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
504		if (msg->msg_namelen < sizeof(*usin))
505			return -EINVAL;
506		if (usin->sin_family != AF_INET)
507			return -EINVAL;
508		daddr = usin->sin_addr.s_addr;
509		/* no remote port */
510	} else {
511		if (sk->sk_state != TCP_ESTABLISHED)
512			return -EDESTADDRREQ;
513		daddr = inet->inet_daddr;
514		/* no remote port */
515	}
516
517	ipc.addr = inet->inet_saddr;
518	ipc.opt = NULL;
519	ipc.oif = sk->sk_bound_dev_if;
520	ipc.tx_flags = 0;
521	err = sock_tx_timestamp(sk, &ipc.tx_flags);
522	if (err)
523		return err;
524
525	if (msg->msg_controllen) {
526		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
527		if (err)
528			return err;
529		if (ipc.opt)
530			free = 1;
531	}
532	if (!ipc.opt) {
533		struct ip_options_rcu *inet_opt;
534
535		rcu_read_lock();
536		inet_opt = rcu_dereference(inet->inet_opt);
537		if (inet_opt) {
538			memcpy(&opt_copy, inet_opt,
539			       sizeof(*inet_opt) + inet_opt->opt.optlen);
540			ipc.opt = &opt_copy.opt;
541		}
542		rcu_read_unlock();
543	}
544
545	saddr = ipc.addr;
546	ipc.addr = faddr = daddr;
547
548	if (ipc.opt && ipc.opt->opt.srr) {
549		if (!daddr)
550			return -EINVAL;
551		faddr = ipc.opt->opt.faddr;
552	}
553	tos = RT_TOS(inet->tos);
554	if (sock_flag(sk, SOCK_LOCALROUTE) ||
555	    (msg->msg_flags & MSG_DONTROUTE) ||
556	    (ipc.opt && ipc.opt->opt.is_strictroute)) {
557		tos |= RTO_ONLINK;
558	}
559
560	if (ipv4_is_multicast(daddr)) {
561		if (!ipc.oif)
562			ipc.oif = inet->mc_index;
563		if (!saddr)
564			saddr = inet->mc_addr;
565	} else if (!ipc.oif)
566		ipc.oif = inet->uc_index;
567
568	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
569			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
570			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
571
572	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
573	rt = ip_route_output_flow(net, &fl4, sk);
574	if (IS_ERR(rt)) {
575		err = PTR_ERR(rt);
576		rt = NULL;
577		if (err == -ENETUNREACH)
578			IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
579		goto out;
580	}
581
582	err = -EACCES;
583	if ((rt->rt_flags & RTCF_BROADCAST) &&
584	    !sock_flag(sk, SOCK_BROADCAST))
585		goto out;
586
587	if (msg->msg_flags & MSG_CONFIRM)
588		goto do_confirm;
589back_from_confirm:
590
591	if (!ipc.addr)
592		ipc.addr = fl4.daddr;
593
594	lock_sock(sk);
595
596	pfh.icmph.type = user_icmph.type; /* already checked */
597	pfh.icmph.code = user_icmph.code; /* ditto */
598	pfh.icmph.checksum = 0;
599	pfh.icmph.un.echo.id = inet->inet_sport;
600	pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
601	pfh.iov = msg->msg_iov;
602	pfh.wcheck = 0;
603
604	err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len,
605			0, &ipc, &rt, msg->msg_flags);
606	if (err)
607		ip_flush_pending_frames(sk);
608	else
609		err = ping_push_pending_frames(sk, &pfh, &fl4);
610	release_sock(sk);
611
612out:
613	ip_rt_put(rt);
614	if (free)
615		kfree(ipc.opt);
616	if (!err) {
617		icmp_out_count(sock_net(sk), user_icmph.type);
618		return len;
619	}
620	return err;
621
622do_confirm:
623	dst_confirm(&rt->dst);
624	if (!(msg->msg_flags & MSG_PROBE) || len)
625		goto back_from_confirm;
626	err = 0;
627	goto out;
628}
629
630static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
631			size_t len, int noblock, int flags, int *addr_len)
632{
633	struct inet_sock *isk = inet_sk(sk);
634	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
635	struct sk_buff *skb;
636	int copied, err;
637
638	pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num);
639
640	err = -EOPNOTSUPP;
641	if (flags & MSG_OOB)
642		goto out;
643
644	if (addr_len)
645		*addr_len = sizeof(*sin);
646
647	if (flags & MSG_ERRQUEUE)
648		return ip_recv_error(sk, msg, len);
649
650	skb = skb_recv_datagram(sk, flags, noblock, &err);
651	if (!skb)
652		goto out;
653
654	copied = skb->len;
655	if (copied > len) {
656		msg->msg_flags |= MSG_TRUNC;
657		copied = len;
658	}
659
660	/* Don't bother checking the checksum */
661	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
662	if (err)
663		goto done;
664
665	sock_recv_timestamp(msg, sk, skb);
666
667	/* Copy the address. */
668	if (sin) {
669		sin->sin_family = AF_INET;
670		sin->sin_port = 0 /* skb->h.uh->source */;
671		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
672		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
673	}
674	if (isk->cmsg_flags)
675		ip_cmsg_recv(msg, skb);
676	err = copied;
677
678done:
679	skb_free_datagram(sk, skb);
680out:
681	pr_debug("ping_recvmsg -> %d\n", err);
682	return err;
683}
684
685static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
686{
687	pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n",
688		 inet_sk(sk), inet_sk(sk)->inet_num, skb);
689	if (sock_queue_rcv_skb(sk, skb) < 0) {
 
690		kfree_skb(skb);
691		pr_debug("ping_queue_rcv_skb -> failed\n");
692		return -1;
693	}
694	return 0;
695}
696
697
698/*
699 *	All we need to do is get the socket.
700 */
701
702void ping_rcv(struct sk_buff *skb)
703{
704	struct sock *sk;
705	struct net *net = dev_net(skb->dev);
706	struct iphdr *iph = ip_hdr(skb);
707	struct icmphdr *icmph = icmp_hdr(skb);
708	__be32 saddr = iph->saddr;
709	__be32 daddr = iph->daddr;
710
711	/* We assume the packet has already been checked by icmp_rcv */
712
713	pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n",
714		 skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence));
715
716	/* Push ICMP header back */
717	skb_push(skb, skb->data - (u8 *)icmph);
718
719	sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id),
720			    skb->dev->ifindex);
721	if (sk != NULL) {
722		pr_debug("rcv on socket %p\n", sk);
723		ping_queue_rcv_skb(sk, skb_get(skb));
724		sock_put(sk);
725		return;
726	}
727	pr_debug("no socket, dropping\n");
728
729	/* We're called from icmp_rcv(). kfree_skb() is done there. */
730}
731
732struct proto ping_prot = {
733	.name =		"PING",
734	.owner =	THIS_MODULE,
735	.init =		ping_init_sock,
736	.close =	ping_close,
737	.connect =	ip4_datagram_connect,
738	.disconnect =	udp_disconnect,
739	.setsockopt =	ip_setsockopt,
740	.getsockopt =	ip_getsockopt,
741	.sendmsg =	ping_sendmsg,
742	.recvmsg =	ping_recvmsg,
743	.bind =		ping_bind,
744	.backlog_rcv =	ping_queue_rcv_skb,
745	.hash =		ping_v4_hash,
746	.unhash =	ping_v4_unhash,
747	.get_port =	ping_v4_get_port,
748	.obj_size =	sizeof(struct inet_sock),
749};
750EXPORT_SYMBOL(ping_prot);
751
752#ifdef CONFIG_PROC_FS
753
754static struct sock *ping_get_first(struct seq_file *seq, int start)
755{
756	struct sock *sk;
757	struct ping_iter_state *state = seq->private;
758	struct net *net = seq_file_net(seq);
759
760	for (state->bucket = start; state->bucket < PING_HTABLE_SIZE;
761	     ++state->bucket) {
762		struct hlist_nulls_node *node;
763		struct hlist_nulls_head *hslot;
764
765		hslot = &ping_table.hash[state->bucket];
766
767		if (hlist_nulls_empty(hslot))
768			continue;
769
770		sk_nulls_for_each(sk, node, hslot) {
771			if (net_eq(sock_net(sk), net))
772				goto found;
773		}
774	}
775	sk = NULL;
776found:
777	return sk;
778}
779
780static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk)
781{
782	struct ping_iter_state *state = seq->private;
783	struct net *net = seq_file_net(seq);
784
785	do {
786		sk = sk_nulls_next(sk);
787	} while (sk && (!net_eq(sock_net(sk), net)));
788
789	if (!sk)
790		return ping_get_first(seq, state->bucket + 1);
791	return sk;
792}
793
794static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos)
795{
796	struct sock *sk = ping_get_first(seq, 0);
797
798	if (sk)
799		while (pos && (sk = ping_get_next(seq, sk)) != NULL)
800			--pos;
801	return pos ? NULL : sk;
802}
803
804static void *ping_seq_start(struct seq_file *seq, loff_t *pos)
805{
806	struct ping_iter_state *state = seq->private;
807	state->bucket = 0;
808
809	read_lock_bh(&ping_table.lock);
810
811	return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
812}
813
814static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos)
815{
816	struct sock *sk;
817
818	if (v == SEQ_START_TOKEN)
819		sk = ping_get_idx(seq, 0);
820	else
821		sk = ping_get_next(seq, v);
822
823	++*pos;
824	return sk;
825}
826
827static void ping_seq_stop(struct seq_file *seq, void *v)
828{
829	read_unlock_bh(&ping_table.lock);
830}
831
832static void ping_format_sock(struct sock *sp, struct seq_file *f,
833		int bucket, int *len)
834{
835	struct inet_sock *inet = inet_sk(sp);
836	__be32 dest = inet->inet_daddr;
837	__be32 src = inet->inet_rcv_saddr;
838	__u16 destp = ntohs(inet->inet_dport);
839	__u16 srcp = ntohs(inet->inet_sport);
840
841	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
842		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d%n",
843		bucket, src, srcp, dest, destp, sp->sk_state,
844		sk_wmem_alloc_get(sp),
845		sk_rmem_alloc_get(sp),
846		0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
847		atomic_read(&sp->sk_refcnt), sp,
848		atomic_read(&sp->sk_drops), len);
849}
850
851static int ping_seq_show(struct seq_file *seq, void *v)
852{
853	if (v == SEQ_START_TOKEN)
854		seq_printf(seq, "%-127s\n",
855			   "  sl  local_address rem_address   st tx_queue "
856			   "rx_queue tr tm->when retrnsmt   uid  timeout "
857			   "inode ref pointer drops");
858	else {
859		struct ping_iter_state *state = seq->private;
860		int len;
861
862		ping_format_sock(v, seq, state->bucket, &len);
863		seq_printf(seq, "%*s\n", 127 - len, "");
864	}
865	return 0;
866}
867
868static const struct seq_operations ping_seq_ops = {
869	.show		= ping_seq_show,
870	.start		= ping_seq_start,
871	.next		= ping_seq_next,
872	.stop		= ping_seq_stop,
873};
874
875static int ping_seq_open(struct inode *inode, struct file *file)
876{
877	return seq_open_net(inode, file, &ping_seq_ops,
878			   sizeof(struct ping_iter_state));
879}
880
881static const struct file_operations ping_seq_fops = {
882	.open		= ping_seq_open,
883	.read		= seq_read,
884	.llseek		= seq_lseek,
885	.release	= seq_release_net,
886};
887
888static int ping_proc_register(struct net *net)
889{
890	struct proc_dir_entry *p;
891	int rc = 0;
892
893	p = proc_net_fops_create(net, "icmp", S_IRUGO, &ping_seq_fops);
894	if (!p)
895		rc = -ENOMEM;
896	return rc;
897}
898
899static void ping_proc_unregister(struct net *net)
900{
901	proc_net_remove(net, "icmp");
902}
903
904
905static int __net_init ping_proc_init_net(struct net *net)
906{
907	return ping_proc_register(net);
908}
909
910static void __net_exit ping_proc_exit_net(struct net *net)
911{
912	ping_proc_unregister(net);
913}
914
915static struct pernet_operations ping_net_ops = {
916	.init = ping_proc_init_net,
917	.exit = ping_proc_exit_net,
918};
919
920int __init ping_proc_init(void)
921{
922	return register_pernet_subsys(&ping_net_ops);
923}
924
925void ping_proc_exit(void)
926{
927	unregister_pernet_subsys(&ping_net_ops);
928}
929
930#endif
931
932void __init ping_init(void)
933{
934	int i;
935
936	for (i = 0; i < PING_HTABLE_SIZE; i++)
937		INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
938	rwlock_init(&ping_table.lock);
939}