Loading...
1/*
2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
12/*
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
15
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
21
22 IMPLEMENTATION.
23
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
28
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
34
35
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
38
39
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
43 should be wildcard.
44
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
57
58
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
63
64
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
67
68
69struct rsvp_head {
70 u32 tmap[256/32];
71 u32 hgenerator;
72 u8 tgenerator;
73 struct rsvp_session *ht[256];
74};
75
76struct rsvp_session {
77 struct rsvp_session *next;
78 __be32 dst[RSVP_DST_LEN];
79 struct tc_rsvp_gpi dpi;
80 u8 protocol;
81 u8 tunnelid;
82 /* 16 (src,sport) hash slots, and one wildcard source slot */
83 struct rsvp_filter *ht[16 + 1];
84};
85
86
87struct rsvp_filter {
88 struct rsvp_filter *next;
89 __be32 src[RSVP_DST_LEN];
90 struct tc_rsvp_gpi spi;
91 u8 tunnelhdr;
92
93 struct tcf_result res;
94 struct tcf_exts exts;
95
96 u32 handle;
97 struct rsvp_session *sess;
98};
99
100static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
101{
102 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
103
104 h ^= h>>16;
105 h ^= h>>8;
106 return (h ^ protocol ^ tunnelid) & 0xFF;
107}
108
109static inline unsigned int hash_src(__be32 *src)
110{
111 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
112
113 h ^= h>>16;
114 h ^= h>>8;
115 h ^= h>>4;
116 return h & 0xF;
117}
118
119static struct tcf_ext_map rsvp_ext_map = {
120 .police = TCA_RSVP_POLICE,
121 .action = TCA_RSVP_ACT
122};
123
124#define RSVP_APPLY_RESULT() \
125{ \
126 int r = tcf_exts_exec(skb, &f->exts, res); \
127 if (r < 0) \
128 continue; \
129 else if (r > 0) \
130 return r; \
131}
132
133static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
134 struct tcf_result *res)
135{
136 struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
137 struct rsvp_session *s;
138 struct rsvp_filter *f;
139 unsigned int h1, h2;
140 __be32 *dst, *src;
141 u8 protocol;
142 u8 tunnelid = 0;
143 u8 *xprt;
144#if RSVP_DST_LEN == 4
145 struct ipv6hdr *nhptr;
146
147 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
148 return -1;
149 nhptr = ipv6_hdr(skb);
150#else
151 struct iphdr *nhptr;
152
153 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
154 return -1;
155 nhptr = ip_hdr(skb);
156#endif
157
158restart:
159
160#if RSVP_DST_LEN == 4
161 src = &nhptr->saddr.s6_addr32[0];
162 dst = &nhptr->daddr.s6_addr32[0];
163 protocol = nhptr->nexthdr;
164 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
165#else
166 src = &nhptr->saddr;
167 dst = &nhptr->daddr;
168 protocol = nhptr->protocol;
169 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
170 if (ip_is_fragment(nhptr))
171 return -1;
172#endif
173
174 h1 = hash_dst(dst, protocol, tunnelid);
175 h2 = hash_src(src);
176
177 for (s = sht[h1]; s; s = s->next) {
178 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
179 protocol == s->protocol &&
180 !(s->dpi.mask &
181 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
182#if RSVP_DST_LEN == 4
183 dst[0] == s->dst[0] &&
184 dst[1] == s->dst[1] &&
185 dst[2] == s->dst[2] &&
186#endif
187 tunnelid == s->tunnelid) {
188
189 for (f = s->ht[h2]; f; f = f->next) {
190 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
191 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
192#if RSVP_DST_LEN == 4
193 &&
194 src[0] == f->src[0] &&
195 src[1] == f->src[1] &&
196 src[2] == f->src[2]
197#endif
198 ) {
199 *res = f->res;
200 RSVP_APPLY_RESULT();
201
202matched:
203 if (f->tunnelhdr == 0)
204 return 0;
205
206 tunnelid = f->res.classid;
207 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
208 goto restart;
209 }
210 }
211
212 /* And wildcard bucket... */
213 for (f = s->ht[16]; f; f = f->next) {
214 *res = f->res;
215 RSVP_APPLY_RESULT();
216 goto matched;
217 }
218 return -1;
219 }
220 }
221 return -1;
222}
223
224static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
225{
226 struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
227 struct rsvp_session *s;
228 struct rsvp_filter *f;
229 unsigned int h1 = handle & 0xFF;
230 unsigned int h2 = (handle >> 8) & 0xFF;
231
232 if (h2 > 16)
233 return 0;
234
235 for (s = sht[h1]; s; s = s->next) {
236 for (f = s->ht[h2]; f; f = f->next) {
237 if (f->handle == handle)
238 return (unsigned long)f;
239 }
240 }
241 return 0;
242}
243
244static void rsvp_put(struct tcf_proto *tp, unsigned long f)
245{
246}
247
248static int rsvp_init(struct tcf_proto *tp)
249{
250 struct rsvp_head *data;
251
252 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
253 if (data) {
254 tp->root = data;
255 return 0;
256 }
257 return -ENOBUFS;
258}
259
260static void
261rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
262{
263 tcf_unbind_filter(tp, &f->res);
264 tcf_exts_destroy(tp, &f->exts);
265 kfree(f);
266}
267
268static void rsvp_destroy(struct tcf_proto *tp)
269{
270 struct rsvp_head *data = xchg(&tp->root, NULL);
271 struct rsvp_session **sht;
272 int h1, h2;
273
274 if (data == NULL)
275 return;
276
277 sht = data->ht;
278
279 for (h1 = 0; h1 < 256; h1++) {
280 struct rsvp_session *s;
281
282 while ((s = sht[h1]) != NULL) {
283 sht[h1] = s->next;
284
285 for (h2 = 0; h2 <= 16; h2++) {
286 struct rsvp_filter *f;
287
288 while ((f = s->ht[h2]) != NULL) {
289 s->ht[h2] = f->next;
290 rsvp_delete_filter(tp, f);
291 }
292 }
293 kfree(s);
294 }
295 }
296 kfree(data);
297}
298
299static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
300{
301 struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
302 unsigned int h = f->handle;
303 struct rsvp_session **sp;
304 struct rsvp_session *s = f->sess;
305 int i;
306
307 for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
308 if (*fp == f) {
309 tcf_tree_lock(tp);
310 *fp = f->next;
311 tcf_tree_unlock(tp);
312 rsvp_delete_filter(tp, f);
313
314 /* Strip tree */
315
316 for (i = 0; i <= 16; i++)
317 if (s->ht[i])
318 return 0;
319
320 /* OK, session has no flows */
321 for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
322 *sp; sp = &(*sp)->next) {
323 if (*sp == s) {
324 tcf_tree_lock(tp);
325 *sp = s->next;
326 tcf_tree_unlock(tp);
327
328 kfree(s);
329 return 0;
330 }
331 }
332
333 return 0;
334 }
335 }
336 return 0;
337}
338
339static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
340{
341 struct rsvp_head *data = tp->root;
342 int i = 0xFFFF;
343
344 while (i-- > 0) {
345 u32 h;
346
347 if ((data->hgenerator += 0x10000) == 0)
348 data->hgenerator = 0x10000;
349 h = data->hgenerator|salt;
350 if (rsvp_get(tp, h) == 0)
351 return h;
352 }
353 return 0;
354}
355
356static int tunnel_bts(struct rsvp_head *data)
357{
358 int n = data->tgenerator >> 5;
359 u32 b = 1 << (data->tgenerator & 0x1F);
360
361 if (data->tmap[n] & b)
362 return 0;
363 data->tmap[n] |= b;
364 return 1;
365}
366
367static void tunnel_recycle(struct rsvp_head *data)
368{
369 struct rsvp_session **sht = data->ht;
370 u32 tmap[256/32];
371 int h1, h2;
372
373 memset(tmap, 0, sizeof(tmap));
374
375 for (h1 = 0; h1 < 256; h1++) {
376 struct rsvp_session *s;
377 for (s = sht[h1]; s; s = s->next) {
378 for (h2 = 0; h2 <= 16; h2++) {
379 struct rsvp_filter *f;
380
381 for (f = s->ht[h2]; f; f = f->next) {
382 if (f->tunnelhdr == 0)
383 continue;
384 data->tgenerator = f->res.classid;
385 tunnel_bts(data);
386 }
387 }
388 }
389 }
390
391 memcpy(data->tmap, tmap, sizeof(tmap));
392}
393
394static u32 gen_tunnel(struct rsvp_head *data)
395{
396 int i, k;
397
398 for (k = 0; k < 2; k++) {
399 for (i = 255; i > 0; i--) {
400 if (++data->tgenerator == 0)
401 data->tgenerator = 1;
402 if (tunnel_bts(data))
403 return data->tgenerator;
404 }
405 tunnel_recycle(data);
406 }
407 return 0;
408}
409
410static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
411 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
412 [TCA_RSVP_DST] = { .type = NLA_BINARY,
413 .len = RSVP_DST_LEN * sizeof(u32) },
414 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
415 .len = RSVP_DST_LEN * sizeof(u32) },
416 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
417};
418
419static int rsvp_change(struct tcf_proto *tp, unsigned long base,
420 u32 handle,
421 struct nlattr **tca,
422 unsigned long *arg)
423{
424 struct rsvp_head *data = tp->root;
425 struct rsvp_filter *f, **fp;
426 struct rsvp_session *s, **sp;
427 struct tc_rsvp_pinfo *pinfo = NULL;
428 struct nlattr *opt = tca[TCA_OPTIONS];
429 struct nlattr *tb[TCA_RSVP_MAX + 1];
430 struct tcf_exts e;
431 unsigned int h1, h2;
432 __be32 *dst;
433 int err;
434
435 if (opt == NULL)
436 return handle ? -EINVAL : 0;
437
438 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
439 if (err < 0)
440 return err;
441
442 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
443 if (err < 0)
444 return err;
445
446 f = (struct rsvp_filter *)*arg;
447 if (f) {
448 /* Node exists: adjust only classid */
449
450 if (f->handle != handle && handle)
451 goto errout2;
452 if (tb[TCA_RSVP_CLASSID]) {
453 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
454 tcf_bind_filter(tp, &f->res, base);
455 }
456
457 tcf_exts_change(tp, &f->exts, &e);
458 return 0;
459 }
460
461 /* Now more serious part... */
462 err = -EINVAL;
463 if (handle)
464 goto errout2;
465 if (tb[TCA_RSVP_DST] == NULL)
466 goto errout2;
467
468 err = -ENOBUFS;
469 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
470 if (f == NULL)
471 goto errout2;
472
473 h2 = 16;
474 if (tb[TCA_RSVP_SRC]) {
475 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
476 h2 = hash_src(f->src);
477 }
478 if (tb[TCA_RSVP_PINFO]) {
479 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
480 f->spi = pinfo->spi;
481 f->tunnelhdr = pinfo->tunnelhdr;
482 }
483 if (tb[TCA_RSVP_CLASSID])
484 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
485
486 dst = nla_data(tb[TCA_RSVP_DST]);
487 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
488
489 err = -ENOMEM;
490 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
491 goto errout;
492
493 if (f->tunnelhdr) {
494 err = -EINVAL;
495 if (f->res.classid > 255)
496 goto errout;
497
498 err = -ENOMEM;
499 if (f->res.classid == 0 &&
500 (f->res.classid = gen_tunnel(data)) == 0)
501 goto errout;
502 }
503
504 for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
505 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
506 pinfo && pinfo->protocol == s->protocol &&
507 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
508#if RSVP_DST_LEN == 4
509 dst[0] == s->dst[0] &&
510 dst[1] == s->dst[1] &&
511 dst[2] == s->dst[2] &&
512#endif
513 pinfo->tunnelid == s->tunnelid) {
514
515insert:
516 /* OK, we found appropriate session */
517
518 fp = &s->ht[h2];
519
520 f->sess = s;
521 if (f->tunnelhdr == 0)
522 tcf_bind_filter(tp, &f->res, base);
523
524 tcf_exts_change(tp, &f->exts, &e);
525
526 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
527 if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
528 break;
529 f->next = *fp;
530 wmb();
531 *fp = f;
532
533 *arg = (unsigned long)f;
534 return 0;
535 }
536 }
537
538 /* No session found. Create new one. */
539
540 err = -ENOBUFS;
541 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
542 if (s == NULL)
543 goto errout;
544 memcpy(s->dst, dst, sizeof(s->dst));
545
546 if (pinfo) {
547 s->dpi = pinfo->dpi;
548 s->protocol = pinfo->protocol;
549 s->tunnelid = pinfo->tunnelid;
550 }
551 for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
552 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
553 break;
554 }
555 s->next = *sp;
556 wmb();
557 *sp = s;
558
559 goto insert;
560
561errout:
562 kfree(f);
563errout2:
564 tcf_exts_destroy(tp, &e);
565 return err;
566}
567
568static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
569{
570 struct rsvp_head *head = tp->root;
571 unsigned int h, h1;
572
573 if (arg->stop)
574 return;
575
576 for (h = 0; h < 256; h++) {
577 struct rsvp_session *s;
578
579 for (s = head->ht[h]; s; s = s->next) {
580 for (h1 = 0; h1 <= 16; h1++) {
581 struct rsvp_filter *f;
582
583 for (f = s->ht[h1]; f; f = f->next) {
584 if (arg->count < arg->skip) {
585 arg->count++;
586 continue;
587 }
588 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
589 arg->stop = 1;
590 return;
591 }
592 arg->count++;
593 }
594 }
595 }
596 }
597}
598
599static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
600 struct sk_buff *skb, struct tcmsg *t)
601{
602 struct rsvp_filter *f = (struct rsvp_filter *)fh;
603 struct rsvp_session *s;
604 unsigned char *b = skb_tail_pointer(skb);
605 struct nlattr *nest;
606 struct tc_rsvp_pinfo pinfo;
607
608 if (f == NULL)
609 return skb->len;
610 s = f->sess;
611
612 t->tcm_handle = f->handle;
613
614 nest = nla_nest_start(skb, TCA_OPTIONS);
615 if (nest == NULL)
616 goto nla_put_failure;
617
618 if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
619 goto nla_put_failure;
620 pinfo.dpi = s->dpi;
621 pinfo.spi = f->spi;
622 pinfo.protocol = s->protocol;
623 pinfo.tunnelid = s->tunnelid;
624 pinfo.tunnelhdr = f->tunnelhdr;
625 pinfo.pad = 0;
626 if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
627 goto nla_put_failure;
628 if (f->res.classid &&
629 nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
630 goto nla_put_failure;
631 if (((f->handle >> 8) & 0xFF) != 16 &&
632 nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
633 goto nla_put_failure;
634
635 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
636 goto nla_put_failure;
637
638 nla_nest_end(skb, nest);
639
640 if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
641 goto nla_put_failure;
642 return skb->len;
643
644nla_put_failure:
645 nlmsg_trim(skb, b);
646 return -1;
647}
648
649static struct tcf_proto_ops RSVP_OPS __read_mostly = {
650 .kind = RSVP_ID,
651 .classify = rsvp_classify,
652 .init = rsvp_init,
653 .destroy = rsvp_destroy,
654 .get = rsvp_get,
655 .put = rsvp_put,
656 .change = rsvp_change,
657 .delete = rsvp_delete,
658 .walk = rsvp_walk,
659 .dump = rsvp_dump,
660 .owner = THIS_MODULE,
661};
662
663static int __init init_rsvp(void)
664{
665 return register_tcf_proto_ops(&RSVP_OPS);
666}
667
668static void __exit exit_rsvp(void)
669{
670 unregister_tcf_proto_ops(&RSVP_OPS);
671}
672
673module_init(init_rsvp)
674module_exit(exit_rsvp)
1/*
2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
12/*
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
15
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
21
22 IMPLEMENTATION.
23
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
28
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
34
35
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
38
39
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
43 should be wildcard.
44
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
57
58
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
63
64
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
67
68
69struct rsvp_head {
70 u32 tmap[256/32];
71 u32 hgenerator;
72 u8 tgenerator;
73 struct rsvp_session __rcu *ht[256];
74 struct rcu_head rcu;
75};
76
77struct rsvp_session {
78 struct rsvp_session __rcu *next;
79 __be32 dst[RSVP_DST_LEN];
80 struct tc_rsvp_gpi dpi;
81 u8 protocol;
82 u8 tunnelid;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu *ht[16 + 1];
85 struct rcu_head rcu;
86};
87
88
89struct rsvp_filter {
90 struct rsvp_filter __rcu *next;
91 __be32 src[RSVP_DST_LEN];
92 struct tc_rsvp_gpi spi;
93 u8 tunnelhdr;
94
95 struct tcf_result res;
96 struct tcf_exts exts;
97
98 u32 handle;
99 struct rsvp_session *sess;
100 struct rcu_head rcu;
101};
102
103static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104{
105 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
106
107 h ^= h>>16;
108 h ^= h>>8;
109 return (h ^ protocol ^ tunnelid) & 0xFF;
110}
111
112static inline unsigned int hash_src(__be32 *src)
113{
114 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
115
116 h ^= h>>16;
117 h ^= h>>8;
118 h ^= h>>4;
119 return h & 0xF;
120}
121
122#define RSVP_APPLY_RESULT() \
123{ \
124 int r = tcf_exts_exec(skb, &f->exts, res); \
125 if (r < 0) \
126 continue; \
127 else if (r > 0) \
128 return r; \
129}
130
131static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
132 struct tcf_result *res)
133{
134 struct rsvp_head *head = rcu_dereference_bh(tp->root);
135 struct rsvp_session *s;
136 struct rsvp_filter *f;
137 unsigned int h1, h2;
138 __be32 *dst, *src;
139 u8 protocol;
140 u8 tunnelid = 0;
141 u8 *xprt;
142#if RSVP_DST_LEN == 4
143 struct ipv6hdr *nhptr;
144
145 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
146 return -1;
147 nhptr = ipv6_hdr(skb);
148#else
149 struct iphdr *nhptr;
150
151 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
152 return -1;
153 nhptr = ip_hdr(skb);
154#endif
155 if (unlikely(!head))
156 return -1;
157restart:
158
159#if RSVP_DST_LEN == 4
160 src = &nhptr->saddr.s6_addr32[0];
161 dst = &nhptr->daddr.s6_addr32[0];
162 protocol = nhptr->nexthdr;
163 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
164#else
165 src = &nhptr->saddr;
166 dst = &nhptr->daddr;
167 protocol = nhptr->protocol;
168 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
169 if (ip_is_fragment(nhptr))
170 return -1;
171#endif
172
173 h1 = hash_dst(dst, protocol, tunnelid);
174 h2 = hash_src(src);
175
176 for (s = rcu_dereference_bh(head->ht[h1]); s;
177 s = rcu_dereference_bh(s->next)) {
178 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
179 protocol == s->protocol &&
180 !(s->dpi.mask &
181 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
182#if RSVP_DST_LEN == 4
183 dst[0] == s->dst[0] &&
184 dst[1] == s->dst[1] &&
185 dst[2] == s->dst[2] &&
186#endif
187 tunnelid == s->tunnelid) {
188
189 for (f = rcu_dereference_bh(s->ht[h2]); f;
190 f = rcu_dereference_bh(f->next)) {
191 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
192 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
193#if RSVP_DST_LEN == 4
194 &&
195 src[0] == f->src[0] &&
196 src[1] == f->src[1] &&
197 src[2] == f->src[2]
198#endif
199 ) {
200 *res = f->res;
201 RSVP_APPLY_RESULT();
202
203matched:
204 if (f->tunnelhdr == 0)
205 return 0;
206
207 tunnelid = f->res.classid;
208 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
209 goto restart;
210 }
211 }
212
213 /* And wildcard bucket... */
214 for (f = rcu_dereference_bh(s->ht[16]); f;
215 f = rcu_dereference_bh(f->next)) {
216 *res = f->res;
217 RSVP_APPLY_RESULT();
218 goto matched;
219 }
220 return -1;
221 }
222 }
223 return -1;
224}
225
226static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
227{
228 struct rsvp_head *head = rtnl_dereference(tp->root);
229 struct rsvp_session *s;
230 struct rsvp_filter __rcu **ins;
231 struct rsvp_filter *pins;
232 unsigned int h1 = h & 0xFF;
233 unsigned int h2 = (h >> 8) & 0xFF;
234
235 for (s = rtnl_dereference(head->ht[h1]); s;
236 s = rtnl_dereference(s->next)) {
237 for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
238 ins = &pins->next, pins = rtnl_dereference(*ins)) {
239 if (pins->handle == h) {
240 RCU_INIT_POINTER(n->next, pins->next);
241 rcu_assign_pointer(*ins, n);
242 return;
243 }
244 }
245 }
246
247 /* Something went wrong if we are trying to replace a non-existant
248 * node. Mind as well halt instead of silently failing.
249 */
250 BUG_ON(1);
251}
252
253static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
254{
255 struct rsvp_head *head = rtnl_dereference(tp->root);
256 struct rsvp_session *s;
257 struct rsvp_filter *f;
258 unsigned int h1 = handle & 0xFF;
259 unsigned int h2 = (handle >> 8) & 0xFF;
260
261 if (h2 > 16)
262 return 0;
263
264 for (s = rtnl_dereference(head->ht[h1]); s;
265 s = rtnl_dereference(s->next)) {
266 for (f = rtnl_dereference(s->ht[h2]); f;
267 f = rtnl_dereference(f->next)) {
268 if (f->handle == handle)
269 return (unsigned long)f;
270 }
271 }
272 return 0;
273}
274
275static int rsvp_init(struct tcf_proto *tp)
276{
277 struct rsvp_head *data;
278
279 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
280 if (data) {
281 rcu_assign_pointer(tp->root, data);
282 return 0;
283 }
284 return -ENOBUFS;
285}
286
287static void rsvp_delete_filter_rcu(struct rcu_head *head)
288{
289 struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
290
291 tcf_exts_destroy(&f->exts);
292 kfree(f);
293}
294
295static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
296{
297 tcf_unbind_filter(tp, &f->res);
298 /* all classifiers are required to call tcf_exts_destroy() after rcu
299 * grace period, since converted-to-rcu actions are relying on that
300 * in cleanup() callback
301 */
302 call_rcu(&f->rcu, rsvp_delete_filter_rcu);
303}
304
305static bool rsvp_destroy(struct tcf_proto *tp, bool force)
306{
307 struct rsvp_head *data = rtnl_dereference(tp->root);
308 int h1, h2;
309
310 if (data == NULL)
311 return true;
312
313 if (!force) {
314 for (h1 = 0; h1 < 256; h1++) {
315 if (rcu_access_pointer(data->ht[h1]))
316 return false;
317 }
318 }
319
320 RCU_INIT_POINTER(tp->root, NULL);
321
322 for (h1 = 0; h1 < 256; h1++) {
323 struct rsvp_session *s;
324
325 while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
326 RCU_INIT_POINTER(data->ht[h1], s->next);
327
328 for (h2 = 0; h2 <= 16; h2++) {
329 struct rsvp_filter *f;
330
331 while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
332 rcu_assign_pointer(s->ht[h2], f->next);
333 rsvp_delete_filter(tp, f);
334 }
335 }
336 kfree_rcu(s, rcu);
337 }
338 }
339 kfree_rcu(data, rcu);
340 return true;
341}
342
343static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
344{
345 struct rsvp_head *head = rtnl_dereference(tp->root);
346 struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
347 struct rsvp_filter __rcu **fp;
348 unsigned int h = f->handle;
349 struct rsvp_session __rcu **sp;
350 struct rsvp_session *nsp, *s = f->sess;
351 int i;
352
353 fp = &s->ht[(h >> 8) & 0xFF];
354 for (nfp = rtnl_dereference(*fp); nfp;
355 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
356 if (nfp == f) {
357 RCU_INIT_POINTER(*fp, f->next);
358 rsvp_delete_filter(tp, f);
359
360 /* Strip tree */
361
362 for (i = 0; i <= 16; i++)
363 if (s->ht[i])
364 return 0;
365
366 /* OK, session has no flows */
367 sp = &head->ht[h & 0xFF];
368 for (nsp = rtnl_dereference(*sp); nsp;
369 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
370 if (nsp == s) {
371 RCU_INIT_POINTER(*sp, s->next);
372 kfree_rcu(s, rcu);
373 return 0;
374 }
375 }
376
377 return 0;
378 }
379 }
380 return 0;
381}
382
383static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
384{
385 struct rsvp_head *data = rtnl_dereference(tp->root);
386 int i = 0xFFFF;
387
388 while (i-- > 0) {
389 u32 h;
390
391 if ((data->hgenerator += 0x10000) == 0)
392 data->hgenerator = 0x10000;
393 h = data->hgenerator|salt;
394 if (rsvp_get(tp, h) == 0)
395 return h;
396 }
397 return 0;
398}
399
400static int tunnel_bts(struct rsvp_head *data)
401{
402 int n = data->tgenerator >> 5;
403 u32 b = 1 << (data->tgenerator & 0x1F);
404
405 if (data->tmap[n] & b)
406 return 0;
407 data->tmap[n] |= b;
408 return 1;
409}
410
411static void tunnel_recycle(struct rsvp_head *data)
412{
413 struct rsvp_session __rcu **sht = data->ht;
414 u32 tmap[256/32];
415 int h1, h2;
416
417 memset(tmap, 0, sizeof(tmap));
418
419 for (h1 = 0; h1 < 256; h1++) {
420 struct rsvp_session *s;
421 for (s = rtnl_dereference(sht[h1]); s;
422 s = rtnl_dereference(s->next)) {
423 for (h2 = 0; h2 <= 16; h2++) {
424 struct rsvp_filter *f;
425
426 for (f = rtnl_dereference(s->ht[h2]); f;
427 f = rtnl_dereference(f->next)) {
428 if (f->tunnelhdr == 0)
429 continue;
430 data->tgenerator = f->res.classid;
431 tunnel_bts(data);
432 }
433 }
434 }
435 }
436
437 memcpy(data->tmap, tmap, sizeof(tmap));
438}
439
440static u32 gen_tunnel(struct rsvp_head *data)
441{
442 int i, k;
443
444 for (k = 0; k < 2; k++) {
445 for (i = 255; i > 0; i--) {
446 if (++data->tgenerator == 0)
447 data->tgenerator = 1;
448 if (tunnel_bts(data))
449 return data->tgenerator;
450 }
451 tunnel_recycle(data);
452 }
453 return 0;
454}
455
456static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
457 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
458 [TCA_RSVP_DST] = { .type = NLA_BINARY,
459 .len = RSVP_DST_LEN * sizeof(u32) },
460 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
461 .len = RSVP_DST_LEN * sizeof(u32) },
462 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
463};
464
465static int rsvp_change(struct net *net, struct sk_buff *in_skb,
466 struct tcf_proto *tp, unsigned long base,
467 u32 handle,
468 struct nlattr **tca,
469 unsigned long *arg, bool ovr)
470{
471 struct rsvp_head *data = rtnl_dereference(tp->root);
472 struct rsvp_filter *f, *nfp;
473 struct rsvp_filter __rcu **fp;
474 struct rsvp_session *nsp, *s;
475 struct rsvp_session __rcu **sp;
476 struct tc_rsvp_pinfo *pinfo = NULL;
477 struct nlattr *opt = tca[TCA_OPTIONS];
478 struct nlattr *tb[TCA_RSVP_MAX + 1];
479 struct tcf_exts e;
480 unsigned int h1, h2;
481 __be32 *dst;
482 int err;
483
484 if (opt == NULL)
485 return handle ? -EINVAL : 0;
486
487 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
488 if (err < 0)
489 return err;
490
491 err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
492 if (err < 0)
493 return err;
494 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
495 if (err < 0)
496 goto errout2;
497
498 f = (struct rsvp_filter *)*arg;
499 if (f) {
500 /* Node exists: adjust only classid */
501 struct rsvp_filter *n;
502
503 if (f->handle != handle && handle)
504 goto errout2;
505
506 n = kmemdup(f, sizeof(*f), GFP_KERNEL);
507 if (!n) {
508 err = -ENOMEM;
509 goto errout2;
510 }
511
512 err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
513 if (err < 0) {
514 kfree(n);
515 goto errout2;
516 }
517
518 if (tb[TCA_RSVP_CLASSID]) {
519 n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
520 tcf_bind_filter(tp, &n->res, base);
521 }
522
523 tcf_exts_change(tp, &n->exts, &e);
524 rsvp_replace(tp, n, handle);
525 return 0;
526 }
527
528 /* Now more serious part... */
529 err = -EINVAL;
530 if (handle)
531 goto errout2;
532 if (tb[TCA_RSVP_DST] == NULL)
533 goto errout2;
534
535 err = -ENOBUFS;
536 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
537 if (f == NULL)
538 goto errout2;
539
540 err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
541 if (err < 0)
542 goto errout;
543 h2 = 16;
544 if (tb[TCA_RSVP_SRC]) {
545 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
546 h2 = hash_src(f->src);
547 }
548 if (tb[TCA_RSVP_PINFO]) {
549 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
550 f->spi = pinfo->spi;
551 f->tunnelhdr = pinfo->tunnelhdr;
552 }
553 if (tb[TCA_RSVP_CLASSID])
554 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
555
556 dst = nla_data(tb[TCA_RSVP_DST]);
557 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
558
559 err = -ENOMEM;
560 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
561 goto errout;
562
563 if (f->tunnelhdr) {
564 err = -EINVAL;
565 if (f->res.classid > 255)
566 goto errout;
567
568 err = -ENOMEM;
569 if (f->res.classid == 0 &&
570 (f->res.classid = gen_tunnel(data)) == 0)
571 goto errout;
572 }
573
574 for (sp = &data->ht[h1];
575 (s = rtnl_dereference(*sp)) != NULL;
576 sp = &s->next) {
577 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
578 pinfo && pinfo->protocol == s->protocol &&
579 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
580#if RSVP_DST_LEN == 4
581 dst[0] == s->dst[0] &&
582 dst[1] == s->dst[1] &&
583 dst[2] == s->dst[2] &&
584#endif
585 pinfo->tunnelid == s->tunnelid) {
586
587insert:
588 /* OK, we found appropriate session */
589
590 fp = &s->ht[h2];
591
592 f->sess = s;
593 if (f->tunnelhdr == 0)
594 tcf_bind_filter(tp, &f->res, base);
595
596 tcf_exts_change(tp, &f->exts, &e);
597
598 fp = &s->ht[h2];
599 for (nfp = rtnl_dereference(*fp); nfp;
600 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
601 __u32 mask = nfp->spi.mask & f->spi.mask;
602
603 if (mask != f->spi.mask)
604 break;
605 }
606 RCU_INIT_POINTER(f->next, nfp);
607 rcu_assign_pointer(*fp, f);
608
609 *arg = (unsigned long)f;
610 return 0;
611 }
612 }
613
614 /* No session found. Create new one. */
615
616 err = -ENOBUFS;
617 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
618 if (s == NULL)
619 goto errout;
620 memcpy(s->dst, dst, sizeof(s->dst));
621
622 if (pinfo) {
623 s->dpi = pinfo->dpi;
624 s->protocol = pinfo->protocol;
625 s->tunnelid = pinfo->tunnelid;
626 }
627 sp = &data->ht[h1];
628 for (nsp = rtnl_dereference(*sp); nsp;
629 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
630 if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
631 break;
632 }
633 RCU_INIT_POINTER(s->next, nsp);
634 rcu_assign_pointer(*sp, s);
635
636 goto insert;
637
638errout:
639 tcf_exts_destroy(&f->exts);
640 kfree(f);
641errout2:
642 tcf_exts_destroy(&e);
643 return err;
644}
645
646static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
647{
648 struct rsvp_head *head = rtnl_dereference(tp->root);
649 unsigned int h, h1;
650
651 if (arg->stop)
652 return;
653
654 for (h = 0; h < 256; h++) {
655 struct rsvp_session *s;
656
657 for (s = rtnl_dereference(head->ht[h]); s;
658 s = rtnl_dereference(s->next)) {
659 for (h1 = 0; h1 <= 16; h1++) {
660 struct rsvp_filter *f;
661
662 for (f = rtnl_dereference(s->ht[h1]); f;
663 f = rtnl_dereference(f->next)) {
664 if (arg->count < arg->skip) {
665 arg->count++;
666 continue;
667 }
668 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
669 arg->stop = 1;
670 return;
671 }
672 arg->count++;
673 }
674 }
675 }
676 }
677}
678
679static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
680 struct sk_buff *skb, struct tcmsg *t)
681{
682 struct rsvp_filter *f = (struct rsvp_filter *)fh;
683 struct rsvp_session *s;
684 struct nlattr *nest;
685 struct tc_rsvp_pinfo pinfo;
686
687 if (f == NULL)
688 return skb->len;
689 s = f->sess;
690
691 t->tcm_handle = f->handle;
692
693 nest = nla_nest_start(skb, TCA_OPTIONS);
694 if (nest == NULL)
695 goto nla_put_failure;
696
697 if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
698 goto nla_put_failure;
699 pinfo.dpi = s->dpi;
700 pinfo.spi = f->spi;
701 pinfo.protocol = s->protocol;
702 pinfo.tunnelid = s->tunnelid;
703 pinfo.tunnelhdr = f->tunnelhdr;
704 pinfo.pad = 0;
705 if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
706 goto nla_put_failure;
707 if (f->res.classid &&
708 nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
709 goto nla_put_failure;
710 if (((f->handle >> 8) & 0xFF) != 16 &&
711 nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
712 goto nla_put_failure;
713
714 if (tcf_exts_dump(skb, &f->exts) < 0)
715 goto nla_put_failure;
716
717 nla_nest_end(skb, nest);
718
719 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
720 goto nla_put_failure;
721 return skb->len;
722
723nla_put_failure:
724 nla_nest_cancel(skb, nest);
725 return -1;
726}
727
728static struct tcf_proto_ops RSVP_OPS __read_mostly = {
729 .kind = RSVP_ID,
730 .classify = rsvp_classify,
731 .init = rsvp_init,
732 .destroy = rsvp_destroy,
733 .get = rsvp_get,
734 .change = rsvp_change,
735 .delete = rsvp_delete,
736 .walk = rsvp_walk,
737 .dump = rsvp_dump,
738 .owner = THIS_MODULE,
739};
740
741static int __init init_rsvp(void)
742{
743 return register_tcf_proto_ops(&RSVP_OPS);
744}
745
746static void __exit exit_rsvp(void)
747{
748 unregister_tcf_proto_ops(&RSVP_OPS);
749}
750
751module_init(init_rsvp)
752module_exit(exit_rsvp)