Linux Audio

Check our new training course

Loading...
v3.5.6
  1/*
  2 * net/sched/cls_flow.c		Generic flow classifier
  3 *
  4 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
  5 *
  6 * This program is free software; you can redistribute it and/or
  7 * modify it under the terms of the GNU General Public License
  8 * as published by the Free Software Foundation; either version 2
  9 * of the License, or (at your option) any later version.
 10 */
 11
 12#include <linux/kernel.h>
 13#include <linux/init.h>
 14#include <linux/list.h>
 15#include <linux/jhash.h>
 16#include <linux/random.h>
 17#include <linux/pkt_cls.h>
 18#include <linux/skbuff.h>
 19#include <linux/in.h>
 20#include <linux/ip.h>
 21#include <linux/ipv6.h>
 22#include <linux/if_vlan.h>
 23#include <linux/slab.h>
 24#include <linux/module.h>
 25
 26#include <net/pkt_cls.h>
 27#include <net/ip.h>
 28#include <net/route.h>
 29#include <net/flow_keys.h>
 30
 31#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 32#include <net/netfilter/nf_conntrack.h>
 33#endif
 34
 35struct flow_head {
 36	struct list_head	filters;
 37};
 38
 39struct flow_filter {
 40	struct list_head	list;
 41	struct tcf_exts		exts;
 42	struct tcf_ematch_tree	ematches;
 43	struct timer_list	perturb_timer;
 44	u32			perturb_period;
 45	u32			handle;
 46
 47	u32			nkeys;
 48	u32			keymask;
 49	u32			mode;
 50	u32			mask;
 51	u32			xor;
 52	u32			rshift;
 53	u32			addend;
 54	u32			divisor;
 55	u32			baseclass;
 56	u32			hashrnd;
 57};
 58
 59static const struct tcf_ext_map flow_ext_map = {
 60	.action	= TCA_FLOW_ACT,
 61	.police	= TCA_FLOW_POLICE,
 62};
 63
 64static inline u32 addr_fold(void *addr)
 65{
 66	unsigned long a = (unsigned long)addr;
 67
 68	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
 69}
 70
 71static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
 72{
 73	if (flow->src)
 74		return ntohl(flow->src);
 75	return addr_fold(skb->sk);
 76}
 77
 78static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 79{
 80	if (flow->dst)
 81		return ntohl(flow->dst);
 82	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 83}
 84
 85static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
 86{
 87	return flow->ip_proto;
 88}
 89
 90static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
 91{
 92	if (flow->ports)
 93		return ntohs(flow->port16[0]);
 94
 95	return addr_fold(skb->sk);
 96}
 97
 98static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 99{
100	if (flow->ports)
101		return ntohs(flow->port16[1]);
102
103	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
104}
105
106static u32 flow_get_iif(const struct sk_buff *skb)
107{
108	return skb->skb_iif;
109}
110
111static u32 flow_get_priority(const struct sk_buff *skb)
112{
113	return skb->priority;
114}
115
116static u32 flow_get_mark(const struct sk_buff *skb)
117{
118	return skb->mark;
119}
120
121static u32 flow_get_nfct(const struct sk_buff *skb)
122{
123#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
124	return addr_fold(skb->nfct);
125#else
126	return 0;
127#endif
128}
129
130#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
131#define CTTUPLE(skb, member)						\
132({									\
133	enum ip_conntrack_info ctinfo;					\
134	const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);		\
135	if (ct == NULL)							\
136		goto fallback;						\
137	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
138})
139#else
140#define CTTUPLE(skb, member)						\
141({									\
142	goto fallback;							\
143	0;								\
144})
145#endif
146
147static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
148{
149	switch (skb->protocol) {
150	case htons(ETH_P_IP):
151		return ntohl(CTTUPLE(skb, src.u3.ip));
152	case htons(ETH_P_IPV6):
153		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
154	}
155fallback:
156	return flow_get_src(skb, flow);
157}
158
159static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
160{
161	switch (skb->protocol) {
162	case htons(ETH_P_IP):
163		return ntohl(CTTUPLE(skb, dst.u3.ip));
164	case htons(ETH_P_IPV6):
165		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
166	}
167fallback:
168	return flow_get_dst(skb, flow);
169}
170
171static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
172{
173	return ntohs(CTTUPLE(skb, src.u.all));
174fallback:
175	return flow_get_proto_src(skb, flow);
176}
177
178static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
179{
180	return ntohs(CTTUPLE(skb, dst.u.all));
181fallback:
182	return flow_get_proto_dst(skb, flow);
183}
184
185static u32 flow_get_rtclassid(const struct sk_buff *skb)
186{
187#ifdef CONFIG_IP_ROUTE_CLASSID
188	if (skb_dst(skb))
189		return skb_dst(skb)->tclassid;
190#endif
191	return 0;
192}
193
194static u32 flow_get_skuid(const struct sk_buff *skb)
195{
196	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
197		return skb->sk->sk_socket->file->f_cred->fsuid;
 
 
198	return 0;
199}
200
201static u32 flow_get_skgid(const struct sk_buff *skb)
202{
203	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
204		return skb->sk->sk_socket->file->f_cred->fsgid;
 
 
205	return 0;
206}
207
208static u32 flow_get_vlan_tag(const struct sk_buff *skb)
209{
210	u16 uninitialized_var(tag);
211
212	if (vlan_get_tag(skb, &tag) < 0)
213		return 0;
214	return tag & VLAN_VID_MASK;
215}
216
217static u32 flow_get_rxhash(struct sk_buff *skb)
218{
219	return skb_get_rxhash(skb);
220}
221
222static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
223{
224	switch (key) {
225	case FLOW_KEY_SRC:
226		return flow_get_src(skb, flow);
227	case FLOW_KEY_DST:
228		return flow_get_dst(skb, flow);
229	case FLOW_KEY_PROTO:
230		return flow_get_proto(skb, flow);
231	case FLOW_KEY_PROTO_SRC:
232		return flow_get_proto_src(skb, flow);
233	case FLOW_KEY_PROTO_DST:
234		return flow_get_proto_dst(skb, flow);
235	case FLOW_KEY_IIF:
236		return flow_get_iif(skb);
237	case FLOW_KEY_PRIORITY:
238		return flow_get_priority(skb);
239	case FLOW_KEY_MARK:
240		return flow_get_mark(skb);
241	case FLOW_KEY_NFCT:
242		return flow_get_nfct(skb);
243	case FLOW_KEY_NFCT_SRC:
244		return flow_get_nfct_src(skb, flow);
245	case FLOW_KEY_NFCT_DST:
246		return flow_get_nfct_dst(skb, flow);
247	case FLOW_KEY_NFCT_PROTO_SRC:
248		return flow_get_nfct_proto_src(skb, flow);
249	case FLOW_KEY_NFCT_PROTO_DST:
250		return flow_get_nfct_proto_dst(skb, flow);
251	case FLOW_KEY_RTCLASSID:
252		return flow_get_rtclassid(skb);
253	case FLOW_KEY_SKUID:
254		return flow_get_skuid(skb);
255	case FLOW_KEY_SKGID:
256		return flow_get_skgid(skb);
257	case FLOW_KEY_VLAN_TAG:
258		return flow_get_vlan_tag(skb);
259	case FLOW_KEY_RXHASH:
260		return flow_get_rxhash(skb);
261	default:
262		WARN_ON(1);
263		return 0;
264	}
265}
266
267#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | 		\
268			  (1 << FLOW_KEY_DST) |			\
269			  (1 << FLOW_KEY_PROTO) |		\
270			  (1 << FLOW_KEY_PROTO_SRC) |		\
271			  (1 << FLOW_KEY_PROTO_DST) | 		\
272			  (1 << FLOW_KEY_NFCT_SRC) |		\
273			  (1 << FLOW_KEY_NFCT_DST) |		\
274			  (1 << FLOW_KEY_NFCT_PROTO_SRC) |	\
275			  (1 << FLOW_KEY_NFCT_PROTO_DST))
276
277static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
278			 struct tcf_result *res)
279{
280	struct flow_head *head = tp->root;
281	struct flow_filter *f;
282	u32 keymask;
283	u32 classid;
284	unsigned int n, key;
285	int r;
286
287	list_for_each_entry(f, &head->filters, list) {
288		u32 keys[FLOW_KEY_MAX + 1];
289		struct flow_keys flow_keys;
290
291		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
292			continue;
293
294		keymask = f->keymask;
295		if (keymask & FLOW_KEYS_NEEDED)
296			skb_flow_dissect(skb, &flow_keys);
297
298		for (n = 0; n < f->nkeys; n++) {
299			key = ffs(keymask) - 1;
300			keymask &= ~(1 << key);
301			keys[n] = flow_key_get(skb, key, &flow_keys);
302		}
303
304		if (f->mode == FLOW_MODE_HASH)
305			classid = jhash2(keys, f->nkeys, f->hashrnd);
306		else {
307			classid = keys[0];
308			classid = (classid & f->mask) ^ f->xor;
309			classid = (classid >> f->rshift) + f->addend;
310		}
311
312		if (f->divisor)
313			classid %= f->divisor;
314
315		res->class   = 0;
316		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
317
318		r = tcf_exts_exec(skb, &f->exts, res);
319		if (r < 0)
320			continue;
321		return r;
322	}
323	return -1;
324}
325
326static void flow_perturbation(unsigned long arg)
327{
328	struct flow_filter *f = (struct flow_filter *)arg;
329
330	get_random_bytes(&f->hashrnd, 4);
331	if (f->perturb_period)
332		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
333}
334
335static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
336	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
337	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
338	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
339	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
340	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
341	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
342	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
343	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
344	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
345	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
346	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
347	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
348};
349
350static int flow_change(struct tcf_proto *tp, unsigned long base,
 
351		       u32 handle, struct nlattr **tca,
352		       unsigned long *arg)
353{
354	struct flow_head *head = tp->root;
355	struct flow_filter *f;
356	struct nlattr *opt = tca[TCA_OPTIONS];
357	struct nlattr *tb[TCA_FLOW_MAX + 1];
358	struct tcf_exts e;
359	struct tcf_ematch_tree t;
360	unsigned int nkeys = 0;
361	unsigned int perturb_period = 0;
362	u32 baseclass = 0;
363	u32 keymask = 0;
364	u32 mode;
365	int err;
366
367	if (opt == NULL)
368		return -EINVAL;
369
370	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
371	if (err < 0)
372		return err;
373
374	if (tb[TCA_FLOW_BASECLASS]) {
375		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
376		if (TC_H_MIN(baseclass) == 0)
377			return -EINVAL;
378	}
379
380	if (tb[TCA_FLOW_KEYS]) {
381		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);
382
383		nkeys = hweight32(keymask);
384		if (nkeys == 0)
385			return -EINVAL;
386
387		if (fls(keymask) - 1 > FLOW_KEY_MAX)
388			return -EOPNOTSUPP;
 
 
 
 
389	}
390
391	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map);
 
392	if (err < 0)
393		return err;
394
395	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
396	if (err < 0)
397		goto err1;
398
399	f = (struct flow_filter *)*arg;
400	if (f != NULL) {
401		err = -EINVAL;
402		if (f->handle != handle && handle)
403			goto err2;
404
405		mode = f->mode;
406		if (tb[TCA_FLOW_MODE])
407			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
408		if (mode != FLOW_MODE_HASH && nkeys > 1)
409			goto err2;
410
411		if (mode == FLOW_MODE_HASH)
412			perturb_period = f->perturb_period;
413		if (tb[TCA_FLOW_PERTURB]) {
414			if (mode != FLOW_MODE_HASH)
415				goto err2;
416			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
417		}
418	} else {
419		err = -EINVAL;
420		if (!handle)
421			goto err2;
422		if (!tb[TCA_FLOW_KEYS])
423			goto err2;
424
425		mode = FLOW_MODE_MAP;
426		if (tb[TCA_FLOW_MODE])
427			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
428		if (mode != FLOW_MODE_HASH && nkeys > 1)
429			goto err2;
430
431		if (tb[TCA_FLOW_PERTURB]) {
432			if (mode != FLOW_MODE_HASH)
433				goto err2;
434			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
435		}
436
437		if (TC_H_MAJ(baseclass) == 0)
438			baseclass = TC_H_MAKE(tp->q->handle, baseclass);
439		if (TC_H_MIN(baseclass) == 0)
440			baseclass = TC_H_MAKE(baseclass, 1);
441
442		err = -ENOBUFS;
443		f = kzalloc(sizeof(*f), GFP_KERNEL);
444		if (f == NULL)
445			goto err2;
446
447		f->handle = handle;
448		f->mask	  = ~0U;
 
449
450		get_random_bytes(&f->hashrnd, 4);
451		f->perturb_timer.function = flow_perturbation;
452		f->perturb_timer.data = (unsigned long)f;
453		init_timer_deferrable(&f->perturb_timer);
454	}
455
456	tcf_exts_change(tp, &f->exts, &e);
457	tcf_em_tree_change(tp, &f->ematches, &t);
458
459	tcf_tree_lock(tp);
460
461	if (tb[TCA_FLOW_KEYS]) {
462		f->keymask = keymask;
463		f->nkeys   = nkeys;
464	}
465
466	f->mode = mode;
467
468	if (tb[TCA_FLOW_MASK])
469		f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
470	if (tb[TCA_FLOW_XOR])
471		f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
472	if (tb[TCA_FLOW_RSHIFT])
473		f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
474	if (tb[TCA_FLOW_ADDEND])
475		f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
476
477	if (tb[TCA_FLOW_DIVISOR])
478		f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
479	if (baseclass)
480		f->baseclass = baseclass;
481
482	f->perturb_period = perturb_period;
483	del_timer(&f->perturb_timer);
484	if (perturb_period)
485		mod_timer(&f->perturb_timer, jiffies + perturb_period);
486
487	if (*arg == 0)
488		list_add_tail(&f->list, &head->filters);
489
490	tcf_tree_unlock(tp);
491
492	*arg = (unsigned long)f;
493	return 0;
494
495err2:
496	tcf_em_tree_destroy(tp, &t);
497err1:
498	tcf_exts_destroy(tp, &e);
499	return err;
500}
501
502static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
503{
504	del_timer_sync(&f->perturb_timer);
505	tcf_exts_destroy(tp, &f->exts);
506	tcf_em_tree_destroy(tp, &f->ematches);
507	kfree(f);
508}
509
510static int flow_delete(struct tcf_proto *tp, unsigned long arg)
511{
512	struct flow_filter *f = (struct flow_filter *)arg;
513
514	tcf_tree_lock(tp);
515	list_del(&f->list);
516	tcf_tree_unlock(tp);
517	flow_destroy_filter(tp, f);
518	return 0;
519}
520
521static int flow_init(struct tcf_proto *tp)
522{
523	struct flow_head *head;
524
525	head = kzalloc(sizeof(*head), GFP_KERNEL);
526	if (head == NULL)
527		return -ENOBUFS;
528	INIT_LIST_HEAD(&head->filters);
529	tp->root = head;
530	return 0;
531}
532
533static void flow_destroy(struct tcf_proto *tp)
534{
535	struct flow_head *head = tp->root;
536	struct flow_filter *f, *next;
537
538	list_for_each_entry_safe(f, next, &head->filters, list) {
539		list_del(&f->list);
540		flow_destroy_filter(tp, f);
541	}
542	kfree(head);
543}
544
545static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
546{
547	struct flow_head *head = tp->root;
548	struct flow_filter *f;
549
550	list_for_each_entry(f, &head->filters, list)
551		if (f->handle == handle)
552			return (unsigned long)f;
553	return 0;
554}
555
556static void flow_put(struct tcf_proto *tp, unsigned long f)
557{
558}
559
560static int flow_dump(struct tcf_proto *tp, unsigned long fh,
561		     struct sk_buff *skb, struct tcmsg *t)
562{
563	struct flow_filter *f = (struct flow_filter *)fh;
564	struct nlattr *nest;
565
566	if (f == NULL)
567		return skb->len;
568
569	t->tcm_handle = f->handle;
570
571	nest = nla_nest_start(skb, TCA_OPTIONS);
572	if (nest == NULL)
573		goto nla_put_failure;
574
575	if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) ||
576	    nla_put_u32(skb, TCA_FLOW_MODE, f->mode))
577		goto nla_put_failure;
578
579	if (f->mask != ~0 || f->xor != 0) {
580		if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) ||
581		    nla_put_u32(skb, TCA_FLOW_XOR, f->xor))
582			goto nla_put_failure;
583	}
584	if (f->rshift &&
585	    nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift))
586		goto nla_put_failure;
587	if (f->addend &&
588	    nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend))
589		goto nla_put_failure;
590
591	if (f->divisor &&
592	    nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor))
593		goto nla_put_failure;
594	if (f->baseclass &&
595	    nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass))
596		goto nla_put_failure;
597
598	if (f->perturb_period &&
599	    nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
600		goto nla_put_failure;
601
602	if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
603		goto nla_put_failure;
604#ifdef CONFIG_NET_EMATCH
605	if (f->ematches.hdr.nmatches &&
606	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
607		goto nla_put_failure;
608#endif
609	nla_nest_end(skb, nest);
610
611	if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
612		goto nla_put_failure;
613
614	return skb->len;
615
616nla_put_failure:
617	nlmsg_trim(skb, nest);
618	return -1;
619}
620
621static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
622{
623	struct flow_head *head = tp->root;
624	struct flow_filter *f;
625
626	list_for_each_entry(f, &head->filters, list) {
627		if (arg->count < arg->skip)
628			goto skip;
629		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
630			arg->stop = 1;
631			break;
632		}
633skip:
634		arg->count++;
635	}
636}
637
638static struct tcf_proto_ops cls_flow_ops __read_mostly = {
639	.kind		= "flow",
640	.classify	= flow_classify,
641	.init		= flow_init,
642	.destroy	= flow_destroy,
643	.change		= flow_change,
644	.delete		= flow_delete,
645	.get		= flow_get,
646	.put		= flow_put,
647	.dump		= flow_dump,
648	.walk		= flow_walk,
649	.owner		= THIS_MODULE,
650};
651
652static int __init cls_flow_init(void)
653{
654	return register_tcf_proto_ops(&cls_flow_ops);
655}
656
657static void __exit cls_flow_exit(void)
658{
659	unregister_tcf_proto_ops(&cls_flow_ops);
660}
661
662module_init(cls_flow_init);
663module_exit(cls_flow_exit);
664
665MODULE_LICENSE("GPL");
666MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
667MODULE_DESCRIPTION("TC flow classifier");
v3.15
  1/*
  2 * net/sched/cls_flow.c		Generic flow classifier
  3 *
  4 * Copyright (c) 2007, 2008 Patrick McHardy <kaber@trash.net>
  5 *
  6 * This program is free software; you can redistribute it and/or
  7 * modify it under the terms of the GNU General Public License
  8 * as published by the Free Software Foundation; either version 2
  9 * of the License, or (at your option) any later version.
 10 */
 11
 12#include <linux/kernel.h>
 13#include <linux/init.h>
 14#include <linux/list.h>
 15#include <linux/jhash.h>
 16#include <linux/random.h>
 17#include <linux/pkt_cls.h>
 18#include <linux/skbuff.h>
 19#include <linux/in.h>
 20#include <linux/ip.h>
 21#include <linux/ipv6.h>
 22#include <linux/if_vlan.h>
 23#include <linux/slab.h>
 24#include <linux/module.h>
 25
 26#include <net/pkt_cls.h>
 27#include <net/ip.h>
 28#include <net/route.h>
 29#include <net/flow_keys.h>
 30
 31#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 32#include <net/netfilter/nf_conntrack.h>
 33#endif
 34
 35struct flow_head {
 36	struct list_head	filters;
 37};
 38
 39struct flow_filter {
 40	struct list_head	list;
 41	struct tcf_exts		exts;
 42	struct tcf_ematch_tree	ematches;
 43	struct timer_list	perturb_timer;
 44	u32			perturb_period;
 45	u32			handle;
 46
 47	u32			nkeys;
 48	u32			keymask;
 49	u32			mode;
 50	u32			mask;
 51	u32			xor;
 52	u32			rshift;
 53	u32			addend;
 54	u32			divisor;
 55	u32			baseclass;
 56	u32			hashrnd;
 57};
 58
 
 
 
 
 
 59static inline u32 addr_fold(void *addr)
 60{
 61	unsigned long a = (unsigned long)addr;
 62
 63	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
 64}
 65
 66static u32 flow_get_src(const struct sk_buff *skb, const struct flow_keys *flow)
 67{
 68	if (flow->src)
 69		return ntohl(flow->src);
 70	return addr_fold(skb->sk);
 71}
 72
 73static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 74{
 75	if (flow->dst)
 76		return ntohl(flow->dst);
 77	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 78}
 79
 80static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
 81{
 82	return flow->ip_proto;
 83}
 84
 85static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
 86{
 87	if (flow->ports)
 88		return ntohs(flow->port16[0]);
 89
 90	return addr_fold(skb->sk);
 91}
 92
 93static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 94{
 95	if (flow->ports)
 96		return ntohs(flow->port16[1]);
 97
 98	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 99}
100
101static u32 flow_get_iif(const struct sk_buff *skb)
102{
103	return skb->skb_iif;
104}
105
106static u32 flow_get_priority(const struct sk_buff *skb)
107{
108	return skb->priority;
109}
110
111static u32 flow_get_mark(const struct sk_buff *skb)
112{
113	return skb->mark;
114}
115
116static u32 flow_get_nfct(const struct sk_buff *skb)
117{
118#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
119	return addr_fold(skb->nfct);
120#else
121	return 0;
122#endif
123}
124
125#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
126#define CTTUPLE(skb, member)						\
127({									\
128	enum ip_conntrack_info ctinfo;					\
129	const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);		\
130	if (ct == NULL)							\
131		goto fallback;						\
132	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
133})
134#else
135#define CTTUPLE(skb, member)						\
136({									\
137	goto fallback;							\
138	0;								\
139})
140#endif
141
142static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
143{
144	switch (skb->protocol) {
145	case htons(ETH_P_IP):
146		return ntohl(CTTUPLE(skb, src.u3.ip));
147	case htons(ETH_P_IPV6):
148		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
149	}
150fallback:
151	return flow_get_src(skb, flow);
152}
153
154static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
155{
156	switch (skb->protocol) {
157	case htons(ETH_P_IP):
158		return ntohl(CTTUPLE(skb, dst.u3.ip));
159	case htons(ETH_P_IPV6):
160		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
161	}
162fallback:
163	return flow_get_dst(skb, flow);
164}
165
166static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
167{
168	return ntohs(CTTUPLE(skb, src.u.all));
169fallback:
170	return flow_get_proto_src(skb, flow);
171}
172
173static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
174{
175	return ntohs(CTTUPLE(skb, dst.u.all));
176fallback:
177	return flow_get_proto_dst(skb, flow);
178}
179
180static u32 flow_get_rtclassid(const struct sk_buff *skb)
181{
182#ifdef CONFIG_IP_ROUTE_CLASSID
183	if (skb_dst(skb))
184		return skb_dst(skb)->tclassid;
185#endif
186	return 0;
187}
188
189static u32 flow_get_skuid(const struct sk_buff *skb)
190{
191	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
192		kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid;
193		return from_kuid(&init_user_ns, skuid);
194	}
195	return 0;
196}
197
198static u32 flow_get_skgid(const struct sk_buff *skb)
199{
200	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
201		kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid;
202		return from_kgid(&init_user_ns, skgid);
203	}
204	return 0;
205}
206
207static u32 flow_get_vlan_tag(const struct sk_buff *skb)
208{
209	u16 uninitialized_var(tag);
210
211	if (vlan_get_tag(skb, &tag) < 0)
212		return 0;
213	return tag & VLAN_VID_MASK;
214}
215
216static u32 flow_get_rxhash(struct sk_buff *skb)
217{
218	return skb_get_hash(skb);
219}
220
221static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow)
222{
223	switch (key) {
224	case FLOW_KEY_SRC:
225		return flow_get_src(skb, flow);
226	case FLOW_KEY_DST:
227		return flow_get_dst(skb, flow);
228	case FLOW_KEY_PROTO:
229		return flow_get_proto(skb, flow);
230	case FLOW_KEY_PROTO_SRC:
231		return flow_get_proto_src(skb, flow);
232	case FLOW_KEY_PROTO_DST:
233		return flow_get_proto_dst(skb, flow);
234	case FLOW_KEY_IIF:
235		return flow_get_iif(skb);
236	case FLOW_KEY_PRIORITY:
237		return flow_get_priority(skb);
238	case FLOW_KEY_MARK:
239		return flow_get_mark(skb);
240	case FLOW_KEY_NFCT:
241		return flow_get_nfct(skb);
242	case FLOW_KEY_NFCT_SRC:
243		return flow_get_nfct_src(skb, flow);
244	case FLOW_KEY_NFCT_DST:
245		return flow_get_nfct_dst(skb, flow);
246	case FLOW_KEY_NFCT_PROTO_SRC:
247		return flow_get_nfct_proto_src(skb, flow);
248	case FLOW_KEY_NFCT_PROTO_DST:
249		return flow_get_nfct_proto_dst(skb, flow);
250	case FLOW_KEY_RTCLASSID:
251		return flow_get_rtclassid(skb);
252	case FLOW_KEY_SKUID:
253		return flow_get_skuid(skb);
254	case FLOW_KEY_SKGID:
255		return flow_get_skgid(skb);
256	case FLOW_KEY_VLAN_TAG:
257		return flow_get_vlan_tag(skb);
258	case FLOW_KEY_RXHASH:
259		return flow_get_rxhash(skb);
260	default:
261		WARN_ON(1);
262		return 0;
263	}
264}
265
266#define FLOW_KEYS_NEEDED ((1 << FLOW_KEY_SRC) | 		\
267			  (1 << FLOW_KEY_DST) |			\
268			  (1 << FLOW_KEY_PROTO) |		\
269			  (1 << FLOW_KEY_PROTO_SRC) |		\
270			  (1 << FLOW_KEY_PROTO_DST) | 		\
271			  (1 << FLOW_KEY_NFCT_SRC) |		\
272			  (1 << FLOW_KEY_NFCT_DST) |		\
273			  (1 << FLOW_KEY_NFCT_PROTO_SRC) |	\
274			  (1 << FLOW_KEY_NFCT_PROTO_DST))
275
276static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
277			 struct tcf_result *res)
278{
279	struct flow_head *head = tp->root;
280	struct flow_filter *f;
281	u32 keymask;
282	u32 classid;
283	unsigned int n, key;
284	int r;
285
286	list_for_each_entry(f, &head->filters, list) {
287		u32 keys[FLOW_KEY_MAX + 1];
288		struct flow_keys flow_keys;
289
290		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
291			continue;
292
293		keymask = f->keymask;
294		if (keymask & FLOW_KEYS_NEEDED)
295			skb_flow_dissect(skb, &flow_keys);
296
297		for (n = 0; n < f->nkeys; n++) {
298			key = ffs(keymask) - 1;
299			keymask &= ~(1 << key);
300			keys[n] = flow_key_get(skb, key, &flow_keys);
301		}
302
303		if (f->mode == FLOW_MODE_HASH)
304			classid = jhash2(keys, f->nkeys, f->hashrnd);
305		else {
306			classid = keys[0];
307			classid = (classid & f->mask) ^ f->xor;
308			classid = (classid >> f->rshift) + f->addend;
309		}
310
311		if (f->divisor)
312			classid %= f->divisor;
313
314		res->class   = 0;
315		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
316
317		r = tcf_exts_exec(skb, &f->exts, res);
318		if (r < 0)
319			continue;
320		return r;
321	}
322	return -1;
323}
324
325static void flow_perturbation(unsigned long arg)
326{
327	struct flow_filter *f = (struct flow_filter *)arg;
328
329	get_random_bytes(&f->hashrnd, 4);
330	if (f->perturb_period)
331		mod_timer(&f->perturb_timer, jiffies + f->perturb_period);
332}
333
334static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
335	[TCA_FLOW_KEYS]		= { .type = NLA_U32 },
336	[TCA_FLOW_MODE]		= { .type = NLA_U32 },
337	[TCA_FLOW_BASECLASS]	= { .type = NLA_U32 },
338	[TCA_FLOW_RSHIFT]	= { .type = NLA_U32 },
339	[TCA_FLOW_ADDEND]	= { .type = NLA_U32 },
340	[TCA_FLOW_MASK]		= { .type = NLA_U32 },
341	[TCA_FLOW_XOR]		= { .type = NLA_U32 },
342	[TCA_FLOW_DIVISOR]	= { .type = NLA_U32 },
343	[TCA_FLOW_ACT]		= { .type = NLA_NESTED },
344	[TCA_FLOW_POLICE]	= { .type = NLA_NESTED },
345	[TCA_FLOW_EMATCHES]	= { .type = NLA_NESTED },
346	[TCA_FLOW_PERTURB]	= { .type = NLA_U32 },
347};
348
349static int flow_change(struct net *net, struct sk_buff *in_skb,
350		       struct tcf_proto *tp, unsigned long base,
351		       u32 handle, struct nlattr **tca,
352		       unsigned long *arg)
353{
354	struct flow_head *head = tp->root;
355	struct flow_filter *f;
356	struct nlattr *opt = tca[TCA_OPTIONS];
357	struct nlattr *tb[TCA_FLOW_MAX + 1];
358	struct tcf_exts e;
359	struct tcf_ematch_tree t;
360	unsigned int nkeys = 0;
361	unsigned int perturb_period = 0;
362	u32 baseclass = 0;
363	u32 keymask = 0;
364	u32 mode;
365	int err;
366
367	if (opt == NULL)
368		return -EINVAL;
369
370	err = nla_parse_nested(tb, TCA_FLOW_MAX, opt, flow_policy);
371	if (err < 0)
372		return err;
373
374	if (tb[TCA_FLOW_BASECLASS]) {
375		baseclass = nla_get_u32(tb[TCA_FLOW_BASECLASS]);
376		if (TC_H_MIN(baseclass) == 0)
377			return -EINVAL;
378	}
379
380	if (tb[TCA_FLOW_KEYS]) {
381		keymask = nla_get_u32(tb[TCA_FLOW_KEYS]);
382
383		nkeys = hweight32(keymask);
384		if (nkeys == 0)
385			return -EINVAL;
386
387		if (fls(keymask) - 1 > FLOW_KEY_MAX)
388			return -EOPNOTSUPP;
389
390		if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
391		    sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
392			return -EOPNOTSUPP;
393	}
394
395	tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
396	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e);
397	if (err < 0)
398		return err;
399
400	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
401	if (err < 0)
402		goto err1;
403
404	f = (struct flow_filter *)*arg;
405	if (f != NULL) {
406		err = -EINVAL;
407		if (f->handle != handle && handle)
408			goto err2;
409
410		mode = f->mode;
411		if (tb[TCA_FLOW_MODE])
412			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
413		if (mode != FLOW_MODE_HASH && nkeys > 1)
414			goto err2;
415
416		if (mode == FLOW_MODE_HASH)
417			perturb_period = f->perturb_period;
418		if (tb[TCA_FLOW_PERTURB]) {
419			if (mode != FLOW_MODE_HASH)
420				goto err2;
421			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
422		}
423	} else {
424		err = -EINVAL;
425		if (!handle)
426			goto err2;
427		if (!tb[TCA_FLOW_KEYS])
428			goto err2;
429
430		mode = FLOW_MODE_MAP;
431		if (tb[TCA_FLOW_MODE])
432			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
433		if (mode != FLOW_MODE_HASH && nkeys > 1)
434			goto err2;
435
436		if (tb[TCA_FLOW_PERTURB]) {
437			if (mode != FLOW_MODE_HASH)
438				goto err2;
439			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
440		}
441
442		if (TC_H_MAJ(baseclass) == 0)
443			baseclass = TC_H_MAKE(tp->q->handle, baseclass);
444		if (TC_H_MIN(baseclass) == 0)
445			baseclass = TC_H_MAKE(baseclass, 1);
446
447		err = -ENOBUFS;
448		f = kzalloc(sizeof(*f), GFP_KERNEL);
449		if (f == NULL)
450			goto err2;
451
452		f->handle = handle;
453		f->mask	  = ~0U;
454		tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
455
456		get_random_bytes(&f->hashrnd, 4);
457		f->perturb_timer.function = flow_perturbation;
458		f->perturb_timer.data = (unsigned long)f;
459		init_timer_deferrable(&f->perturb_timer);
460	}
461
462	tcf_exts_change(tp, &f->exts, &e);
463	tcf_em_tree_change(tp, &f->ematches, &t);
464
465	tcf_tree_lock(tp);
466
467	if (tb[TCA_FLOW_KEYS]) {
468		f->keymask = keymask;
469		f->nkeys   = nkeys;
470	}
471
472	f->mode = mode;
473
474	if (tb[TCA_FLOW_MASK])
475		f->mask = nla_get_u32(tb[TCA_FLOW_MASK]);
476	if (tb[TCA_FLOW_XOR])
477		f->xor = nla_get_u32(tb[TCA_FLOW_XOR]);
478	if (tb[TCA_FLOW_RSHIFT])
479		f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]);
480	if (tb[TCA_FLOW_ADDEND])
481		f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]);
482
483	if (tb[TCA_FLOW_DIVISOR])
484		f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]);
485	if (baseclass)
486		f->baseclass = baseclass;
487
488	f->perturb_period = perturb_period;
489	del_timer(&f->perturb_timer);
490	if (perturb_period)
491		mod_timer(&f->perturb_timer, jiffies + perturb_period);
492
493	if (*arg == 0)
494		list_add_tail(&f->list, &head->filters);
495
496	tcf_tree_unlock(tp);
497
498	*arg = (unsigned long)f;
499	return 0;
500
501err2:
502	tcf_em_tree_destroy(tp, &t);
503err1:
504	tcf_exts_destroy(tp, &e);
505	return err;
506}
507
508static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
509{
510	del_timer_sync(&f->perturb_timer);
511	tcf_exts_destroy(tp, &f->exts);
512	tcf_em_tree_destroy(tp, &f->ematches);
513	kfree(f);
514}
515
516static int flow_delete(struct tcf_proto *tp, unsigned long arg)
517{
518	struct flow_filter *f = (struct flow_filter *)arg;
519
520	tcf_tree_lock(tp);
521	list_del(&f->list);
522	tcf_tree_unlock(tp);
523	flow_destroy_filter(tp, f);
524	return 0;
525}
526
527static int flow_init(struct tcf_proto *tp)
528{
529	struct flow_head *head;
530
531	head = kzalloc(sizeof(*head), GFP_KERNEL);
532	if (head == NULL)
533		return -ENOBUFS;
534	INIT_LIST_HEAD(&head->filters);
535	tp->root = head;
536	return 0;
537}
538
539static void flow_destroy(struct tcf_proto *tp)
540{
541	struct flow_head *head = tp->root;
542	struct flow_filter *f, *next;
543
544	list_for_each_entry_safe(f, next, &head->filters, list) {
545		list_del(&f->list);
546		flow_destroy_filter(tp, f);
547	}
548	kfree(head);
549}
550
551static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
552{
553	struct flow_head *head = tp->root;
554	struct flow_filter *f;
555
556	list_for_each_entry(f, &head->filters, list)
557		if (f->handle == handle)
558			return (unsigned long)f;
559	return 0;
560}
561
562static void flow_put(struct tcf_proto *tp, unsigned long f)
563{
564}
565
566static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
567		     struct sk_buff *skb, struct tcmsg *t)
568{
569	struct flow_filter *f = (struct flow_filter *)fh;
570	struct nlattr *nest;
571
572	if (f == NULL)
573		return skb->len;
574
575	t->tcm_handle = f->handle;
576
577	nest = nla_nest_start(skb, TCA_OPTIONS);
578	if (nest == NULL)
579		goto nla_put_failure;
580
581	if (nla_put_u32(skb, TCA_FLOW_KEYS, f->keymask) ||
582	    nla_put_u32(skb, TCA_FLOW_MODE, f->mode))
583		goto nla_put_failure;
584
585	if (f->mask != ~0 || f->xor != 0) {
586		if (nla_put_u32(skb, TCA_FLOW_MASK, f->mask) ||
587		    nla_put_u32(skb, TCA_FLOW_XOR, f->xor))
588			goto nla_put_failure;
589	}
590	if (f->rshift &&
591	    nla_put_u32(skb, TCA_FLOW_RSHIFT, f->rshift))
592		goto nla_put_failure;
593	if (f->addend &&
594	    nla_put_u32(skb, TCA_FLOW_ADDEND, f->addend))
595		goto nla_put_failure;
596
597	if (f->divisor &&
598	    nla_put_u32(skb, TCA_FLOW_DIVISOR, f->divisor))
599		goto nla_put_failure;
600	if (f->baseclass &&
601	    nla_put_u32(skb, TCA_FLOW_BASECLASS, f->baseclass))
602		goto nla_put_failure;
603
604	if (f->perturb_period &&
605	    nla_put_u32(skb, TCA_FLOW_PERTURB, f->perturb_period / HZ))
606		goto nla_put_failure;
607
608	if (tcf_exts_dump(skb, &f->exts) < 0)
609		goto nla_put_failure;
610#ifdef CONFIG_NET_EMATCH
611	if (f->ematches.hdr.nmatches &&
612	    tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0)
613		goto nla_put_failure;
614#endif
615	nla_nest_end(skb, nest);
616
617	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
618		goto nla_put_failure;
619
620	return skb->len;
621
622nla_put_failure:
623	nlmsg_trim(skb, nest);
624	return -1;
625}
626
627static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
628{
629	struct flow_head *head = tp->root;
630	struct flow_filter *f;
631
632	list_for_each_entry(f, &head->filters, list) {
633		if (arg->count < arg->skip)
634			goto skip;
635		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
636			arg->stop = 1;
637			break;
638		}
639skip:
640		arg->count++;
641	}
642}
643
644static struct tcf_proto_ops cls_flow_ops __read_mostly = {
645	.kind		= "flow",
646	.classify	= flow_classify,
647	.init		= flow_init,
648	.destroy	= flow_destroy,
649	.change		= flow_change,
650	.delete		= flow_delete,
651	.get		= flow_get,
652	.put		= flow_put,
653	.dump		= flow_dump,
654	.walk		= flow_walk,
655	.owner		= THIS_MODULE,
656};
657
658static int __init cls_flow_init(void)
659{
660	return register_tcf_proto_ops(&cls_flow_ops);
661}
662
663static void __exit cls_flow_exit(void)
664{
665	unregister_tcf_proto_ops(&cls_flow_ops);
666}
667
668module_init(cls_flow_init);
669module_exit(cls_flow_exit);
670
671MODULE_LICENSE("GPL");
672MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
673MODULE_DESCRIPTION("TC flow classifier");