Linux Audio

Check our new training course

Loading...
v4.6
  1/*
  2 * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
  3 *
  4 *		This program is free software; you can redistribute it and/or
  5 *		modify it under the terms of the GNU General Public License
  6 *		as published by the Free Software Foundation; either version
  7 *		2 of the License, or (at your option) any later version.
  8 *
  9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10 */
 11
 12/*
 13   Comparing to general packet classification problem,
 14   RSVP needs only sevaral relatively simple rules:
 15
 16   * (dst, protocol) are always specified,
 17     so that we are able to hash them.
 18   * src may be exact, or may be wildcard, so that
 19     we can keep a hash table plus one wildcard entry.
 20   * source port (or flow label) is important only if src is given.
 21
 22   IMPLEMENTATION.
 23
 24   We use a two level hash table: The top level is keyed by
 25   destination address and protocol ID, every bucket contains a list
 26   of "rsvp sessions", identified by destination address, protocol and
 27   DPI(="Destination Port ID"): triple (key, mask, offset).
 28
 29   Every bucket has a smaller hash table keyed by source address
 30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
 31   Every bucket is again a list of "RSVP flows", selected by
 32   source address and SPI(="Source Port ID" here rather than
 33   "security parameter index"): triple (key, mask, offset).
 34
 35
 36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
 37   and all fragmented packets go to the best-effort traffic class.
 38
 39
 40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
 41   only one "Generalized Port Identifier". So that for classic
 42   ah, esp (and udp,tcp) both *pi should coincide or one of them
 43   should be wildcard.
 44
 45   At first sight, this redundancy is just a waste of CPU
 46   resources. But DPI and SPI add the possibility to assign different
 47   priorities to GPIs. Look also at note 4 about tunnels below.
 48
 49
 50   NOTE 3. One complication is the case of tunneled packets.
 51   We implement it as following: if the first lookup
 52   matches a special session with "tunnelhdr" value not zero,
 53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
 54   In this case, we pull tunnelhdr bytes and restart lookup
 55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
 56   It's enough for PIMREG and IPIP.
 57
 58
 59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
 60   F.e. DPI can select ETH_P_IP (and necessary flags to make
 61   tunnelhdr correct) in GRE protocol field and SPI matches
 62   GRE key. Is it not nice? 8)8)
 63
 64
 65   Well, as result, despite its simplicity, we get a pretty
 66   powerful classification engine.  */
 67
 68
 69struct rsvp_head {
 70	u32			tmap[256/32];
 71	u32			hgenerator;
 72	u8			tgenerator;
 73	struct rsvp_session __rcu *ht[256];
 74	struct rcu_head		rcu;
 75};
 76
 77struct rsvp_session {
 78	struct rsvp_session __rcu	*next;
 79	__be32				dst[RSVP_DST_LEN];
 80	struct tc_rsvp_gpi		dpi;
 81	u8				protocol;
 82	u8				tunnelid;
 83	/* 16 (src,sport) hash slots, and one wildcard source slot */
 84	struct rsvp_filter __rcu	*ht[16 + 1];
 85	struct rcu_head			rcu;
 86};
 87
 88
 89struct rsvp_filter {
 90	struct rsvp_filter __rcu	*next;
 91	__be32				src[RSVP_DST_LEN];
 92	struct tc_rsvp_gpi		spi;
 93	u8				tunnelhdr;
 94
 95	struct tcf_result		res;
 96	struct tcf_exts			exts;
 97
 98	u32				handle;
 99	struct rsvp_session		*sess;
100	struct rcu_head			rcu;
101};
102
103static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104{
105	unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
106
107	h ^= h>>16;
108	h ^= h>>8;
109	return (h ^ protocol ^ tunnelid) & 0xFF;
110}
111
112static inline unsigned int hash_src(__be32 *src)
113{
114	unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
115
116	h ^= h>>16;
117	h ^= h>>8;
118	h ^= h>>4;
119	return h & 0xF;
120}
121
 
 
 
 
 
122#define RSVP_APPLY_RESULT()				\
123{							\
124	int r = tcf_exts_exec(skb, &f->exts, res);	\
125	if (r < 0)					\
126		continue;				\
127	else if (r > 0)					\
128		return r;				\
129}
130
131static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
132			 struct tcf_result *res)
133{
134	struct rsvp_head *head = rcu_dereference_bh(tp->root);
135	struct rsvp_session *s;
136	struct rsvp_filter *f;
137	unsigned int h1, h2;
138	__be32 *dst, *src;
139	u8 protocol;
140	u8 tunnelid = 0;
141	u8 *xprt;
142#if RSVP_DST_LEN == 4
143	struct ipv6hdr *nhptr;
144
145	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
146		return -1;
147	nhptr = ipv6_hdr(skb);
148#else
149	struct iphdr *nhptr;
150
151	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
152		return -1;
153	nhptr = ip_hdr(skb);
154#endif
155
156restart:
157
158#if RSVP_DST_LEN == 4
159	src = &nhptr->saddr.s6_addr32[0];
160	dst = &nhptr->daddr.s6_addr32[0];
161	protocol = nhptr->nexthdr;
162	xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
163#else
164	src = &nhptr->saddr;
165	dst = &nhptr->daddr;
166	protocol = nhptr->protocol;
167	xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
168	if (ip_is_fragment(nhptr))
169		return -1;
170#endif
171
172	h1 = hash_dst(dst, protocol, tunnelid);
173	h2 = hash_src(src);
174
175	for (s = rcu_dereference_bh(head->ht[h1]); s;
176	     s = rcu_dereference_bh(s->next)) {
177		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
178		    protocol == s->protocol &&
179		    !(s->dpi.mask &
180		      (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
181#if RSVP_DST_LEN == 4
182		    dst[0] == s->dst[0] &&
183		    dst[1] == s->dst[1] &&
184		    dst[2] == s->dst[2] &&
185#endif
186		    tunnelid == s->tunnelid) {
187
188			for (f = rcu_dereference_bh(s->ht[h2]); f;
189			     f = rcu_dereference_bh(f->next)) {
190				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
191				    !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
192#if RSVP_DST_LEN == 4
193				    &&
194				    src[0] == f->src[0] &&
195				    src[1] == f->src[1] &&
196				    src[2] == f->src[2]
197#endif
198				    ) {
199					*res = f->res;
200					RSVP_APPLY_RESULT();
201
202matched:
203					if (f->tunnelhdr == 0)
204						return 0;
205
206					tunnelid = f->res.classid;
207					nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
208					goto restart;
209				}
210			}
211
212			/* And wildcard bucket... */
213			for (f = rcu_dereference_bh(s->ht[16]); f;
214			     f = rcu_dereference_bh(f->next)) {
215				*res = f->res;
216				RSVP_APPLY_RESULT();
217				goto matched;
218			}
219			return -1;
220		}
221	}
222	return -1;
223}
224
225static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
226{
227	struct rsvp_head *head = rtnl_dereference(tp->root);
228	struct rsvp_session *s;
229	struct rsvp_filter __rcu **ins;
230	struct rsvp_filter *pins;
231	unsigned int h1 = h & 0xFF;
232	unsigned int h2 = (h >> 8) & 0xFF;
233
234	for (s = rtnl_dereference(head->ht[h1]); s;
235	     s = rtnl_dereference(s->next)) {
236		for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
237		     ins = &pins->next, pins = rtnl_dereference(*ins)) {
238			if (pins->handle == h) {
239				RCU_INIT_POINTER(n->next, pins->next);
240				rcu_assign_pointer(*ins, n);
241				return;
242			}
243		}
244	}
245
246	/* Something went wrong if we are trying to replace a non-existant
247	 * node. Mind as well halt instead of silently failing.
248	 */
249	BUG_ON(1);
250}
251
252static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
253{
254	struct rsvp_head *head = rtnl_dereference(tp->root);
255	struct rsvp_session *s;
256	struct rsvp_filter *f;
257	unsigned int h1 = handle & 0xFF;
258	unsigned int h2 = (handle >> 8) & 0xFF;
259
260	if (h2 > 16)
261		return 0;
262
263	for (s = rtnl_dereference(head->ht[h1]); s;
264	     s = rtnl_dereference(s->next)) {
265		for (f = rtnl_dereference(s->ht[h2]); f;
266		     f = rtnl_dereference(f->next)) {
267			if (f->handle == handle)
268				return (unsigned long)f;
269		}
270	}
271	return 0;
272}
273
 
 
 
 
274static int rsvp_init(struct tcf_proto *tp)
275{
276	struct rsvp_head *data;
277
278	data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
279	if (data) {
280		rcu_assign_pointer(tp->root, data);
281		return 0;
282	}
283	return -ENOBUFS;
284}
285
286static void rsvp_delete_filter_rcu(struct rcu_head *head)
287{
288	struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
289
290	tcf_exts_destroy(&f->exts);
291	kfree(f);
292}
293
294static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
295{
296	tcf_unbind_filter(tp, &f->res);
297	/* all classifiers are required to call tcf_exts_destroy() after rcu
298	 * grace period, since converted-to-rcu actions are relying on that
299	 * in cleanup() callback
300	 */
301	call_rcu(&f->rcu, rsvp_delete_filter_rcu);
302}
303
304static bool rsvp_destroy(struct tcf_proto *tp, bool force)
305{
306	struct rsvp_head *data = rtnl_dereference(tp->root);
 
307	int h1, h2;
308
309	if (data == NULL)
310		return true;
311
312	if (!force) {
313		for (h1 = 0; h1 < 256; h1++) {
314			if (rcu_access_pointer(data->ht[h1]))
315				return false;
316		}
317	}
318
319	RCU_INIT_POINTER(tp->root, NULL);
320
321	for (h1 = 0; h1 < 256; h1++) {
322		struct rsvp_session *s;
323
324		while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
325			RCU_INIT_POINTER(data->ht[h1], s->next);
326
327			for (h2 = 0; h2 <= 16; h2++) {
328				struct rsvp_filter *f;
329
330				while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
331					rcu_assign_pointer(s->ht[h2], f->next);
332					rsvp_delete_filter(tp, f);
333				}
334			}
335			kfree_rcu(s, rcu);
336		}
337	}
338	kfree_rcu(data, rcu);
339	return true;
340}
341
342static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
343{
344	struct rsvp_head *head = rtnl_dereference(tp->root);
345	struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
346	struct rsvp_filter __rcu **fp;
347	unsigned int h = f->handle;
348	struct rsvp_session __rcu **sp;
349	struct rsvp_session *nsp, *s = f->sess;
350	int i;
351
352	fp = &s->ht[(h >> 8) & 0xFF];
353	for (nfp = rtnl_dereference(*fp); nfp;
354	     fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
355		if (nfp == f) {
356			RCU_INIT_POINTER(*fp, f->next);
357			rsvp_delete_filter(tp, f);
358
359			/* Strip tree */
360
361			for (i = 0; i <= 16; i++)
362				if (s->ht[i])
363					return 0;
364
365			/* OK, session has no flows */
366			sp = &head->ht[h & 0xFF];
367			for (nsp = rtnl_dereference(*sp); nsp;
368			     sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
369				if (nsp == s) {
370					RCU_INIT_POINTER(*sp, s->next);
371					kfree_rcu(s, rcu);
 
 
372					return 0;
373				}
374			}
375
376			return 0;
377		}
378	}
379	return 0;
380}
381
382static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
383{
384	struct rsvp_head *data = rtnl_dereference(tp->root);
385	int i = 0xFFFF;
386
387	while (i-- > 0) {
388		u32 h;
389
390		if ((data->hgenerator += 0x10000) == 0)
391			data->hgenerator = 0x10000;
392		h = data->hgenerator|salt;
393		if (rsvp_get(tp, h) == 0)
394			return h;
395	}
396	return 0;
397}
398
399static int tunnel_bts(struct rsvp_head *data)
400{
401	int n = data->tgenerator >> 5;
402	u32 b = 1 << (data->tgenerator & 0x1F);
403
404	if (data->tmap[n] & b)
405		return 0;
406	data->tmap[n] |= b;
407	return 1;
408}
409
410static void tunnel_recycle(struct rsvp_head *data)
411{
412	struct rsvp_session __rcu **sht = data->ht;
413	u32 tmap[256/32];
414	int h1, h2;
415
416	memset(tmap, 0, sizeof(tmap));
417
418	for (h1 = 0; h1 < 256; h1++) {
419		struct rsvp_session *s;
420		for (s = rtnl_dereference(sht[h1]); s;
421		     s = rtnl_dereference(s->next)) {
422			for (h2 = 0; h2 <= 16; h2++) {
423				struct rsvp_filter *f;
424
425				for (f = rtnl_dereference(s->ht[h2]); f;
426				     f = rtnl_dereference(f->next)) {
427					if (f->tunnelhdr == 0)
428						continue;
429					data->tgenerator = f->res.classid;
430					tunnel_bts(data);
431				}
432			}
433		}
434	}
435
436	memcpy(data->tmap, tmap, sizeof(tmap));
437}
438
439static u32 gen_tunnel(struct rsvp_head *data)
440{
441	int i, k;
442
443	for (k = 0; k < 2; k++) {
444		for (i = 255; i > 0; i--) {
445			if (++data->tgenerator == 0)
446				data->tgenerator = 1;
447			if (tunnel_bts(data))
448				return data->tgenerator;
449		}
450		tunnel_recycle(data);
451	}
452	return 0;
453}
454
455static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
456	[TCA_RSVP_CLASSID]	= { .type = NLA_U32 },
457	[TCA_RSVP_DST]		= { .type = NLA_BINARY,
458				    .len = RSVP_DST_LEN * sizeof(u32) },
459	[TCA_RSVP_SRC]		= { .type = NLA_BINARY,
460				    .len = RSVP_DST_LEN * sizeof(u32) },
461	[TCA_RSVP_PINFO]	= { .len = sizeof(struct tc_rsvp_pinfo) },
462};
463
464static int rsvp_change(struct net *net, struct sk_buff *in_skb,
465		       struct tcf_proto *tp, unsigned long base,
466		       u32 handle,
467		       struct nlattr **tca,
468		       unsigned long *arg, bool ovr)
469{
470	struct rsvp_head *data = rtnl_dereference(tp->root);
471	struct rsvp_filter *f, *nfp;
472	struct rsvp_filter __rcu **fp;
473	struct rsvp_session *nsp, *s;
474	struct rsvp_session __rcu **sp;
475	struct tc_rsvp_pinfo *pinfo = NULL;
476	struct nlattr *opt = tca[TCA_OPTIONS];
477	struct nlattr *tb[TCA_RSVP_MAX + 1];
478	struct tcf_exts e;
479	unsigned int h1, h2;
480	__be32 *dst;
481	int err;
482
483	if (opt == NULL)
484		return handle ? -EINVAL : 0;
485
486	err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
487	if (err < 0)
488		return err;
489
490	tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
491	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
492	if (err < 0)
493		return err;
494
495	f = (struct rsvp_filter *)*arg;
496	if (f) {
497		/* Node exists: adjust only classid */
498		struct rsvp_filter *n;
499
500		if (f->handle != handle && handle)
501			goto errout2;
502
503		n = kmemdup(f, sizeof(*f), GFP_KERNEL);
504		if (!n) {
505			err = -ENOMEM;
506			goto errout2;
507		}
508
509		tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
510
511		if (tb[TCA_RSVP_CLASSID]) {
512			n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
513			tcf_bind_filter(tp, &n->res, base);
514		}
515
516		tcf_exts_change(tp, &n->exts, &e);
517		rsvp_replace(tp, n, handle);
518		return 0;
519	}
520
521	/* Now more serious part... */
522	err = -EINVAL;
523	if (handle)
524		goto errout2;
525	if (tb[TCA_RSVP_DST] == NULL)
526		goto errout2;
527
528	err = -ENOBUFS;
529	f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
530	if (f == NULL)
531		goto errout2;
532
533	tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
534	h2 = 16;
535	if (tb[TCA_RSVP_SRC]) {
536		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
537		h2 = hash_src(f->src);
538	}
539	if (tb[TCA_RSVP_PINFO]) {
540		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
541		f->spi = pinfo->spi;
542		f->tunnelhdr = pinfo->tunnelhdr;
543	}
544	if (tb[TCA_RSVP_CLASSID])
545		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
546
547	dst = nla_data(tb[TCA_RSVP_DST]);
548	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
549
550	err = -ENOMEM;
551	if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
552		goto errout;
553
554	if (f->tunnelhdr) {
555		err = -EINVAL;
556		if (f->res.classid > 255)
557			goto errout;
558
559		err = -ENOMEM;
560		if (f->res.classid == 0 &&
561		    (f->res.classid = gen_tunnel(data)) == 0)
562			goto errout;
563	}
564
565	for (sp = &data->ht[h1];
566	     (s = rtnl_dereference(*sp)) != NULL;
567	     sp = &s->next) {
568		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
569		    pinfo && pinfo->protocol == s->protocol &&
570		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
571#if RSVP_DST_LEN == 4
572		    dst[0] == s->dst[0] &&
573		    dst[1] == s->dst[1] &&
574		    dst[2] == s->dst[2] &&
575#endif
576		    pinfo->tunnelid == s->tunnelid) {
577
578insert:
579			/* OK, we found appropriate session */
580
581			fp = &s->ht[h2];
582
583			f->sess = s;
584			if (f->tunnelhdr == 0)
585				tcf_bind_filter(tp, &f->res, base);
586
587			tcf_exts_change(tp, &f->exts, &e);
588
589			fp = &s->ht[h2];
590			for (nfp = rtnl_dereference(*fp); nfp;
591			     fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
592				__u32 mask = nfp->spi.mask & f->spi.mask;
593
594				if (mask != f->spi.mask)
595					break;
596			}
597			RCU_INIT_POINTER(f->next, nfp);
598			rcu_assign_pointer(*fp, f);
599
600			*arg = (unsigned long)f;
601			return 0;
602		}
603	}
604
605	/* No session found. Create new one. */
606
607	err = -ENOBUFS;
608	s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
609	if (s == NULL)
610		goto errout;
611	memcpy(s->dst, dst, sizeof(s->dst));
612
613	if (pinfo) {
614		s->dpi = pinfo->dpi;
615		s->protocol = pinfo->protocol;
616		s->tunnelid = pinfo->tunnelid;
617	}
618	sp = &data->ht[h1];
619	for (nsp = rtnl_dereference(*sp); nsp;
620	     sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
621		if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
622			break;
623	}
624	RCU_INIT_POINTER(s->next, nsp);
625	rcu_assign_pointer(*sp, s);
 
626
627	goto insert;
628
629errout:
630	kfree(f);
631errout2:
632	tcf_exts_destroy(&e);
633	return err;
634}
635
636static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
637{
638	struct rsvp_head *head = rtnl_dereference(tp->root);
639	unsigned int h, h1;
640
641	if (arg->stop)
642		return;
643
644	for (h = 0; h < 256; h++) {
645		struct rsvp_session *s;
646
647		for (s = rtnl_dereference(head->ht[h]); s;
648		     s = rtnl_dereference(s->next)) {
649			for (h1 = 0; h1 <= 16; h1++) {
650				struct rsvp_filter *f;
651
652				for (f = rtnl_dereference(s->ht[h1]); f;
653				     f = rtnl_dereference(f->next)) {
654					if (arg->count < arg->skip) {
655						arg->count++;
656						continue;
657					}
658					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
659						arg->stop = 1;
660						return;
661					}
662					arg->count++;
663				}
664			}
665		}
666	}
667}
668
669static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
670		     struct sk_buff *skb, struct tcmsg *t)
671{
672	struct rsvp_filter *f = (struct rsvp_filter *)fh;
673	struct rsvp_session *s;
 
674	struct nlattr *nest;
675	struct tc_rsvp_pinfo pinfo;
676
677	if (f == NULL)
678		return skb->len;
679	s = f->sess;
680
681	t->tcm_handle = f->handle;
682
683	nest = nla_nest_start(skb, TCA_OPTIONS);
684	if (nest == NULL)
685		goto nla_put_failure;
686
687	if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
688		goto nla_put_failure;
689	pinfo.dpi = s->dpi;
690	pinfo.spi = f->spi;
691	pinfo.protocol = s->protocol;
692	pinfo.tunnelid = s->tunnelid;
693	pinfo.tunnelhdr = f->tunnelhdr;
694	pinfo.pad = 0;
695	if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
696		goto nla_put_failure;
697	if (f->res.classid &&
698	    nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
699		goto nla_put_failure;
700	if (((f->handle >> 8) & 0xFF) != 16 &&
701	    nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
702		goto nla_put_failure;
703
704	if (tcf_exts_dump(skb, &f->exts) < 0)
705		goto nla_put_failure;
706
707	nla_nest_end(skb, nest);
708
709	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
710		goto nla_put_failure;
711	return skb->len;
712
713nla_put_failure:
714	nla_nest_cancel(skb, nest);
715	return -1;
716}
717
718static struct tcf_proto_ops RSVP_OPS __read_mostly = {
719	.kind		=	RSVP_ID,
720	.classify	=	rsvp_classify,
721	.init		=	rsvp_init,
722	.destroy	=	rsvp_destroy,
723	.get		=	rsvp_get,
 
724	.change		=	rsvp_change,
725	.delete		=	rsvp_delete,
726	.walk		=	rsvp_walk,
727	.dump		=	rsvp_dump,
728	.owner		=	THIS_MODULE,
729};
730
731static int __init init_rsvp(void)
732{
733	return register_tcf_proto_ops(&RSVP_OPS);
734}
735
736static void __exit exit_rsvp(void)
737{
738	unregister_tcf_proto_ops(&RSVP_OPS);
739}
740
741module_init(init_rsvp)
742module_exit(exit_rsvp)
v3.1
  1/*
  2 * net/sched/cls_rsvp.h	Template file for RSVPv[46] classifiers.
  3 *
  4 *		This program is free software; you can redistribute it and/or
  5 *		modify it under the terms of the GNU General Public License
  6 *		as published by the Free Software Foundation; either version
  7 *		2 of the License, or (at your option) any later version.
  8 *
  9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10 */
 11
 12/*
 13   Comparing to general packet classification problem,
 14   RSVP needs only sevaral relatively simple rules:
 15
 16   * (dst, protocol) are always specified,
 17     so that we are able to hash them.
 18   * src may be exact, or may be wildcard, so that
 19     we can keep a hash table plus one wildcard entry.
 20   * source port (or flow label) is important only if src is given.
 21
 22   IMPLEMENTATION.
 23
 24   We use a two level hash table: The top level is keyed by
 25   destination address and protocol ID, every bucket contains a list
 26   of "rsvp sessions", identified by destination address, protocol and
 27   DPI(="Destination Port ID"): triple (key, mask, offset).
 28
 29   Every bucket has a smaller hash table keyed by source address
 30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
 31   Every bucket is again a list of "RSVP flows", selected by
 32   source address and SPI(="Source Port ID" here rather than
 33   "security parameter index"): triple (key, mask, offset).
 34
 35
 36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
 37   and all fragmented packets go to the best-effort traffic class.
 38
 39
 40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
 41   only one "Generalized Port Identifier". So that for classic
 42   ah, esp (and udp,tcp) both *pi should coincide or one of them
 43   should be wildcard.
 44
 45   At first sight, this redundancy is just a waste of CPU
 46   resources. But DPI and SPI add the possibility to assign different
 47   priorities to GPIs. Look also at note 4 about tunnels below.
 48
 49
 50   NOTE 3. One complication is the case of tunneled packets.
 51   We implement it as following: if the first lookup
 52   matches a special session with "tunnelhdr" value not zero,
 53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
 54   In this case, we pull tunnelhdr bytes and restart lookup
 55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
 56   It's enough for PIMREG and IPIP.
 57
 58
 59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
 60   F.e. DPI can select ETH_P_IP (and necessary flags to make
 61   tunnelhdr correct) in GRE protocol field and SPI matches
 62   GRE key. Is it not nice? 8)8)
 63
 64
 65   Well, as result, despite its simplicity, we get a pretty
 66   powerful classification engine.  */
 67
 68
 69struct rsvp_head {
 70	u32			tmap[256/32];
 71	u32			hgenerator;
 72	u8			tgenerator;
 73	struct rsvp_session	*ht[256];
 
 74};
 75
 76struct rsvp_session {
 77	struct rsvp_session	*next;
 78	__be32			dst[RSVP_DST_LEN];
 79	struct tc_rsvp_gpi 	dpi;
 80	u8			protocol;
 81	u8			tunnelid;
 82	/* 16 (src,sport) hash slots, and one wildcard source slot */
 83	struct rsvp_filter	*ht[16 + 1];
 
 84};
 85
 86
 87struct rsvp_filter {
 88	struct rsvp_filter	*next;
 89	__be32			src[RSVP_DST_LEN];
 90	struct tc_rsvp_gpi	spi;
 91	u8			tunnelhdr;
 92
 93	struct tcf_result	res;
 94	struct tcf_exts		exts;
 95
 96	u32			handle;
 97	struct rsvp_session	*sess;
 
 98};
 99
100static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
101{
102	unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
103
104	h ^= h>>16;
105	h ^= h>>8;
106	return (h ^ protocol ^ tunnelid) & 0xFF;
107}
108
109static inline unsigned int hash_src(__be32 *src)
110{
111	unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
112
113	h ^= h>>16;
114	h ^= h>>8;
115	h ^= h>>4;
116	return h & 0xF;
117}
118
119static struct tcf_ext_map rsvp_ext_map = {
120	.police = TCA_RSVP_POLICE,
121	.action = TCA_RSVP_ACT
122};
123
124#define RSVP_APPLY_RESULT()				\
125{							\
126	int r = tcf_exts_exec(skb, &f->exts, res);	\
127	if (r < 0)					\
128		continue;				\
129	else if (r > 0)					\
130		return r;				\
131}
132
133static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
134			 struct tcf_result *res)
135{
136	struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
137	struct rsvp_session *s;
138	struct rsvp_filter *f;
139	unsigned int h1, h2;
140	__be32 *dst, *src;
141	u8 protocol;
142	u8 tunnelid = 0;
143	u8 *xprt;
144#if RSVP_DST_LEN == 4
145	struct ipv6hdr *nhptr;
146
147	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
148		return -1;
149	nhptr = ipv6_hdr(skb);
150#else
151	struct iphdr *nhptr;
152
153	if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
154		return -1;
155	nhptr = ip_hdr(skb);
156#endif
157
158restart:
159
160#if RSVP_DST_LEN == 4
161	src = &nhptr->saddr.s6_addr32[0];
162	dst = &nhptr->daddr.s6_addr32[0];
163	protocol = nhptr->nexthdr;
164	xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
165#else
166	src = &nhptr->saddr;
167	dst = &nhptr->daddr;
168	protocol = nhptr->protocol;
169	xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
170	if (ip_is_fragment(nhptr))
171		return -1;
172#endif
173
174	h1 = hash_dst(dst, protocol, tunnelid);
175	h2 = hash_src(src);
176
177	for (s = sht[h1]; s; s = s->next) {
 
178		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
179		    protocol == s->protocol &&
180		    !(s->dpi.mask &
181		      (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
182#if RSVP_DST_LEN == 4
183		    dst[0] == s->dst[0] &&
184		    dst[1] == s->dst[1] &&
185		    dst[2] == s->dst[2] &&
186#endif
187		    tunnelid == s->tunnelid) {
188
189			for (f = s->ht[h2]; f; f = f->next) {
 
190				if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
191				    !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
192#if RSVP_DST_LEN == 4
193				    &&
194				    src[0] == f->src[0] &&
195				    src[1] == f->src[1] &&
196				    src[2] == f->src[2]
197#endif
198				    ) {
199					*res = f->res;
200					RSVP_APPLY_RESULT();
201
202matched:
203					if (f->tunnelhdr == 0)
204						return 0;
205
206					tunnelid = f->res.classid;
207					nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
208					goto restart;
209				}
210			}
211
212			/* And wildcard bucket... */
213			for (f = s->ht[16]; f; f = f->next) {
 
214				*res = f->res;
215				RSVP_APPLY_RESULT();
216				goto matched;
217			}
218			return -1;
219		}
220	}
221	return -1;
222}
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
225{
226	struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
227	struct rsvp_session *s;
228	struct rsvp_filter *f;
229	unsigned int h1 = handle & 0xFF;
230	unsigned int h2 = (handle >> 8) & 0xFF;
231
232	if (h2 > 16)
233		return 0;
234
235	for (s = sht[h1]; s; s = s->next) {
236		for (f = s->ht[h2]; f; f = f->next) {
 
 
237			if (f->handle == handle)
238				return (unsigned long)f;
239		}
240	}
241	return 0;
242}
243
244static void rsvp_put(struct tcf_proto *tp, unsigned long f)
245{
246}
247
248static int rsvp_init(struct tcf_proto *tp)
249{
250	struct rsvp_head *data;
251
252	data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
253	if (data) {
254		tp->root = data;
255		return 0;
256	}
257	return -ENOBUFS;
258}
259
260static void
261rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 
 
 
 
 
 
 
262{
263	tcf_unbind_filter(tp, &f->res);
264	tcf_exts_destroy(tp, &f->exts);
265	kfree(f);
 
 
 
266}
267
268static void rsvp_destroy(struct tcf_proto *tp)
269{
270	struct rsvp_head *data = xchg(&tp->root, NULL);
271	struct rsvp_session **sht;
272	int h1, h2;
273
274	if (data == NULL)
275		return;
 
 
 
 
 
 
 
276
277	sht = data->ht;
278
279	for (h1 = 0; h1 < 256; h1++) {
280		struct rsvp_session *s;
281
282		while ((s = sht[h1]) != NULL) {
283			sht[h1] = s->next;
284
285			for (h2 = 0; h2 <= 16; h2++) {
286				struct rsvp_filter *f;
287
288				while ((f = s->ht[h2]) != NULL) {
289					s->ht[h2] = f->next;
290					rsvp_delete_filter(tp, f);
291				}
292			}
293			kfree(s);
294		}
295	}
296	kfree(data);
 
297}
298
299static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
300{
301	struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
 
 
302	unsigned int h = f->handle;
303	struct rsvp_session **sp;
304	struct rsvp_session *s = f->sess;
305	int i;
306
307	for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
308		if (*fp == f) {
309			tcf_tree_lock(tp);
310			*fp = f->next;
311			tcf_tree_unlock(tp);
312			rsvp_delete_filter(tp, f);
313
314			/* Strip tree */
315
316			for (i = 0; i <= 16; i++)
317				if (s->ht[i])
318					return 0;
319
320			/* OK, session has no flows */
321			for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
322			     *sp; sp = &(*sp)->next) {
323				if (*sp == s) {
324					tcf_tree_lock(tp);
325					*sp = s->next;
326					tcf_tree_unlock(tp);
327
328					kfree(s);
329					return 0;
330				}
331			}
332
333			return 0;
334		}
335	}
336	return 0;
337}
338
339static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
340{
341	struct rsvp_head *data = tp->root;
342	int i = 0xFFFF;
343
344	while (i-- > 0) {
345		u32 h;
346
347		if ((data->hgenerator += 0x10000) == 0)
348			data->hgenerator = 0x10000;
349		h = data->hgenerator|salt;
350		if (rsvp_get(tp, h) == 0)
351			return h;
352	}
353	return 0;
354}
355
356static int tunnel_bts(struct rsvp_head *data)
357{
358	int n = data->tgenerator >> 5;
359	u32 b = 1 << (data->tgenerator & 0x1F);
360
361	if (data->tmap[n] & b)
362		return 0;
363	data->tmap[n] |= b;
364	return 1;
365}
366
367static void tunnel_recycle(struct rsvp_head *data)
368{
369	struct rsvp_session **sht = data->ht;
370	u32 tmap[256/32];
371	int h1, h2;
372
373	memset(tmap, 0, sizeof(tmap));
374
375	for (h1 = 0; h1 < 256; h1++) {
376		struct rsvp_session *s;
377		for (s = sht[h1]; s; s = s->next) {
 
378			for (h2 = 0; h2 <= 16; h2++) {
379				struct rsvp_filter *f;
380
381				for (f = s->ht[h2]; f; f = f->next) {
 
382					if (f->tunnelhdr == 0)
383						continue;
384					data->tgenerator = f->res.classid;
385					tunnel_bts(data);
386				}
387			}
388		}
389	}
390
391	memcpy(data->tmap, tmap, sizeof(tmap));
392}
393
394static u32 gen_tunnel(struct rsvp_head *data)
395{
396	int i, k;
397
398	for (k = 0; k < 2; k++) {
399		for (i = 255; i > 0; i--) {
400			if (++data->tgenerator == 0)
401				data->tgenerator = 1;
402			if (tunnel_bts(data))
403				return data->tgenerator;
404		}
405		tunnel_recycle(data);
406	}
407	return 0;
408}
409
410static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
411	[TCA_RSVP_CLASSID]	= { .type = NLA_U32 },
412	[TCA_RSVP_DST]		= { .type = NLA_BINARY,
413				    .len = RSVP_DST_LEN * sizeof(u32) },
414	[TCA_RSVP_SRC]		= { .type = NLA_BINARY,
415				    .len = RSVP_DST_LEN * sizeof(u32) },
416	[TCA_RSVP_PINFO]	= { .len = sizeof(struct tc_rsvp_pinfo) },
417};
418
419static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 
420		       u32 handle,
421		       struct nlattr **tca,
422		       unsigned long *arg)
423{
424	struct rsvp_head *data = tp->root;
425	struct rsvp_filter *f, **fp;
426	struct rsvp_session *s, **sp;
 
 
427	struct tc_rsvp_pinfo *pinfo = NULL;
428	struct nlattr *opt = tca[TCA_OPTIONS];
429	struct nlattr *tb[TCA_RSVP_MAX + 1];
430	struct tcf_exts e;
431	unsigned int h1, h2;
432	__be32 *dst;
433	int err;
434
435	if (opt == NULL)
436		return handle ? -EINVAL : 0;
437
438	err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
439	if (err < 0)
440		return err;
441
442	err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 
443	if (err < 0)
444		return err;
445
446	f = (struct rsvp_filter *)*arg;
447	if (f) {
448		/* Node exists: adjust only classid */
 
449
450		if (f->handle != handle && handle)
451			goto errout2;
 
 
 
 
 
 
 
 
 
452		if (tb[TCA_RSVP_CLASSID]) {
453			f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
454			tcf_bind_filter(tp, &f->res, base);
455		}
456
457		tcf_exts_change(tp, &f->exts, &e);
 
458		return 0;
459	}
460
461	/* Now more serious part... */
462	err = -EINVAL;
463	if (handle)
464		goto errout2;
465	if (tb[TCA_RSVP_DST] == NULL)
466		goto errout2;
467
468	err = -ENOBUFS;
469	f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
470	if (f == NULL)
471		goto errout2;
472
 
473	h2 = 16;
474	if (tb[TCA_RSVP_SRC]) {
475		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
476		h2 = hash_src(f->src);
477	}
478	if (tb[TCA_RSVP_PINFO]) {
479		pinfo = nla_data(tb[TCA_RSVP_PINFO]);
480		f->spi = pinfo->spi;
481		f->tunnelhdr = pinfo->tunnelhdr;
482	}
483	if (tb[TCA_RSVP_CLASSID])
484		f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
485
486	dst = nla_data(tb[TCA_RSVP_DST]);
487	h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
488
489	err = -ENOMEM;
490	if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
491		goto errout;
492
493	if (f->tunnelhdr) {
494		err = -EINVAL;
495		if (f->res.classid > 255)
496			goto errout;
497
498		err = -ENOMEM;
499		if (f->res.classid == 0 &&
500		    (f->res.classid = gen_tunnel(data)) == 0)
501			goto errout;
502	}
503
504	for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
 
 
505		if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
506		    pinfo && pinfo->protocol == s->protocol &&
507		    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
508#if RSVP_DST_LEN == 4
509		    dst[0] == s->dst[0] &&
510		    dst[1] == s->dst[1] &&
511		    dst[2] == s->dst[2] &&
512#endif
513		    pinfo->tunnelid == s->tunnelid) {
514
515insert:
516			/* OK, we found appropriate session */
517
518			fp = &s->ht[h2];
519
520			f->sess = s;
521			if (f->tunnelhdr == 0)
522				tcf_bind_filter(tp, &f->res, base);
523
524			tcf_exts_change(tp, &f->exts, &e);
525
526			for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
527				if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
 
 
 
 
528					break;
529			f->next = *fp;
530			wmb();
531			*fp = f;
532
533			*arg = (unsigned long)f;
534			return 0;
535		}
536	}
537
538	/* No session found. Create new one. */
539
540	err = -ENOBUFS;
541	s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
542	if (s == NULL)
543		goto errout;
544	memcpy(s->dst, dst, sizeof(s->dst));
545
546	if (pinfo) {
547		s->dpi = pinfo->dpi;
548		s->protocol = pinfo->protocol;
549		s->tunnelid = pinfo->tunnelid;
550	}
551	for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
552		if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
 
 
553			break;
554	}
555	s->next = *sp;
556	wmb();
557	*sp = s;
558
559	goto insert;
560
561errout:
562	kfree(f);
563errout2:
564	tcf_exts_destroy(tp, &e);
565	return err;
566}
567
568static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
569{
570	struct rsvp_head *head = tp->root;
571	unsigned int h, h1;
572
573	if (arg->stop)
574		return;
575
576	for (h = 0; h < 256; h++) {
577		struct rsvp_session *s;
578
579		for (s = head->ht[h]; s; s = s->next) {
 
580			for (h1 = 0; h1 <= 16; h1++) {
581				struct rsvp_filter *f;
582
583				for (f = s->ht[h1]; f; f = f->next) {
 
584					if (arg->count < arg->skip) {
585						arg->count++;
586						continue;
587					}
588					if (arg->fn(tp, (unsigned long)f, arg) < 0) {
589						arg->stop = 1;
590						return;
591					}
592					arg->count++;
593				}
594			}
595		}
596	}
597}
598
599static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
600		     struct sk_buff *skb, struct tcmsg *t)
601{
602	struct rsvp_filter *f = (struct rsvp_filter *)fh;
603	struct rsvp_session *s;
604	unsigned char *b = skb_tail_pointer(skb);
605	struct nlattr *nest;
606	struct tc_rsvp_pinfo pinfo;
607
608	if (f == NULL)
609		return skb->len;
610	s = f->sess;
611
612	t->tcm_handle = f->handle;
613
614	nest = nla_nest_start(skb, TCA_OPTIONS);
615	if (nest == NULL)
616		goto nla_put_failure;
617
618	NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
 
619	pinfo.dpi = s->dpi;
620	pinfo.spi = f->spi;
621	pinfo.protocol = s->protocol;
622	pinfo.tunnelid = s->tunnelid;
623	pinfo.tunnelhdr = f->tunnelhdr;
624	pinfo.pad = 0;
625	NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
626	if (f->res.classid)
627		NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
628	if (((f->handle >> 8) & 0xFF) != 16)
629		NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 
 
 
630
631	if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
632		goto nla_put_failure;
633
634	nla_nest_end(skb, nest);
635
636	if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
637		goto nla_put_failure;
638	return skb->len;
639
640nla_put_failure:
641	nlmsg_trim(skb, b);
642	return -1;
643}
644
645static struct tcf_proto_ops RSVP_OPS __read_mostly = {
646	.kind		=	RSVP_ID,
647	.classify	=	rsvp_classify,
648	.init		=	rsvp_init,
649	.destroy	=	rsvp_destroy,
650	.get		=	rsvp_get,
651	.put		=	rsvp_put,
652	.change		=	rsvp_change,
653	.delete		=	rsvp_delete,
654	.walk		=	rsvp_walk,
655	.dump		=	rsvp_dump,
656	.owner		=	THIS_MODULE,
657};
658
659static int __init init_rsvp(void)
660{
661	return register_tcf_proto_ops(&RSVP_OPS);
662}
663
664static void __exit exit_rsvp(void)
665{
666	unregister_tcf_proto_ops(&RSVP_OPS);
667}
668
669module_init(init_rsvp)
670module_exit(exit_rsvp)