Linux Audio

Check our new training course

Embedded Linux training

Mar 10-20, 2025, special US time zones
Register
Loading...
Note: File does not exist in v3.1.
  1/*
  2 * Berkeley Packet Filter based traffic classifier
  3 *
  4 * Might be used to classify traffic through flexible, user-defined and
  5 * possibly JIT-ed BPF filters for traffic control as an alternative to
  6 * ematches.
  7 *
  8 * (C) 2013 Daniel Borkmann <dborkman@redhat.com>
  9 *
 10 * This program is free software; you can redistribute it and/or modify
 11 * it under the terms of the GNU General Public License version 2 as
 12 * published by the Free Software Foundation.
 13 */
 14
 15#include <linux/module.h>
 16#include <linux/types.h>
 17#include <linux/skbuff.h>
 18#include <linux/filter.h>
 19#include <linux/bpf.h>
 20
 21#include <net/rtnetlink.h>
 22#include <net/pkt_cls.h>
 23#include <net/sock.h>
 24
 25MODULE_LICENSE("GPL");
 26MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
 27MODULE_DESCRIPTION("TC BPF based classifier");
 28
 29#define CLS_BPF_NAME_LEN	256
 30#define CLS_BPF_SUPPORTED_GEN_FLAGS		\
 31	(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
 32
 33struct cls_bpf_head {
 34	struct list_head plist;
 35	u32 hgen;
 36	struct rcu_head rcu;
 37};
 38
 39struct cls_bpf_prog {
 40	struct bpf_prog *filter;
 41	struct list_head link;
 42	struct tcf_result res;
 43	bool exts_integrated;
 44	bool offloaded;
 45	u32 gen_flags;
 46	struct tcf_exts exts;
 47	u32 handle;
 48	u16 bpf_num_ops;
 49	struct sock_filter *bpf_ops;
 50	const char *bpf_name;
 51	struct tcf_proto *tp;
 52	struct rcu_head rcu;
 53};
 54
 55static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 56	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
 57	[TCA_BPF_FLAGS]		= { .type = NLA_U32 },
 58	[TCA_BPF_FLAGS_GEN]	= { .type = NLA_U32 },
 59	[TCA_BPF_FD]		= { .type = NLA_U32 },
 60	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING,
 61				    .len = CLS_BPF_NAME_LEN },
 62	[TCA_BPF_OPS_LEN]	= { .type = NLA_U16 },
 63	[TCA_BPF_OPS]		= { .type = NLA_BINARY,
 64				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
 65};
 66
 67static int cls_bpf_exec_opcode(int code)
 68{
 69	switch (code) {
 70	case TC_ACT_OK:
 71	case TC_ACT_SHOT:
 72	case TC_ACT_STOLEN:
 73	case TC_ACT_REDIRECT:
 74	case TC_ACT_UNSPEC:
 75		return code;
 76	default:
 77		return TC_ACT_UNSPEC;
 78	}
 79}
 80
 81static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 82			    struct tcf_result *res)
 83{
 84	struct cls_bpf_head *head = rcu_dereference_bh(tp->root);
 85	bool at_ingress = skb_at_tc_ingress(skb);
 86	struct cls_bpf_prog *prog;
 87	int ret = -1;
 88
 89	/* Needed here for accessing maps. */
 90	rcu_read_lock();
 91	list_for_each_entry_rcu(prog, &head->plist, link) {
 92		int filter_res;
 93
 94		qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
 95
 96		if (tc_skip_sw(prog->gen_flags)) {
 97			filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
 98		} else if (at_ingress) {
 99			/* It is safe to push/pull even if skb_shared() */
100			__skb_push(skb, skb->mac_len);
101			bpf_compute_data_end(skb);
102			filter_res = BPF_PROG_RUN(prog->filter, skb);
103			__skb_pull(skb, skb->mac_len);
104		} else {
105			bpf_compute_data_end(skb);
106			filter_res = BPF_PROG_RUN(prog->filter, skb);
107		}
108
109		if (prog->exts_integrated) {
110			res->class   = 0;
111			res->classid = TC_H_MAJ(prog->res.classid) |
112				       qdisc_skb_cb(skb)->tc_classid;
113
114			ret = cls_bpf_exec_opcode(filter_res);
115			if (ret == TC_ACT_UNSPEC)
116				continue;
117			break;
118		}
119
120		if (filter_res == 0)
121			continue;
122		if (filter_res != -1) {
123			res->class   = 0;
124			res->classid = filter_res;
125		} else {
126			*res = prog->res;
127		}
128
129		ret = tcf_exts_exec(skb, &prog->exts, res);
130		if (ret < 0)
131			continue;
132
133		break;
134	}
135	rcu_read_unlock();
136
137	return ret;
138}
139
140static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
141{
142	return !prog->bpf_ops;
143}
144
145static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
146			       enum tc_clsbpf_command cmd)
147{
148	struct net_device *dev = tp->q->dev_queue->dev;
149	struct tc_cls_bpf_offload bpf_offload = {};
150	struct tc_to_netdev offload;
151
152	offload.type = TC_SETUP_CLSBPF;
153	offload.cls_bpf = &bpf_offload;
154
155	bpf_offload.command = cmd;
156	bpf_offload.exts = &prog->exts;
157	bpf_offload.prog = prog->filter;
158	bpf_offload.name = prog->bpf_name;
159	bpf_offload.exts_integrated = prog->exts_integrated;
160	bpf_offload.gen_flags = prog->gen_flags;
161
162	return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
163					     tp->protocol, &offload);
164}
165
166static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
167			   struct cls_bpf_prog *oldprog)
168{
169	struct net_device *dev = tp->q->dev_queue->dev;
170	struct cls_bpf_prog *obj = prog;
171	enum tc_clsbpf_command cmd;
172	bool skip_sw;
173	int ret;
174
175	skip_sw = tc_skip_sw(prog->gen_flags) ||
176		(oldprog && tc_skip_sw(oldprog->gen_flags));
177
178	if (oldprog && oldprog->offloaded) {
179		if (tc_should_offload(dev, tp, prog->gen_flags)) {
180			cmd = TC_CLSBPF_REPLACE;
181		} else if (!tc_skip_sw(prog->gen_flags)) {
182			obj = oldprog;
183			cmd = TC_CLSBPF_DESTROY;
184		} else {
185			return -EINVAL;
186		}
187	} else {
188		if (!tc_should_offload(dev, tp, prog->gen_flags))
189			return skip_sw ? -EINVAL : 0;
190		cmd = TC_CLSBPF_ADD;
191	}
192
193	ret = cls_bpf_offload_cmd(tp, obj, cmd);
194	if (ret)
195		return skip_sw ? ret : 0;
196
197	obj->offloaded = true;
198	if (oldprog)
199		oldprog->offloaded = false;
200
201	return 0;
202}
203
204static void cls_bpf_stop_offload(struct tcf_proto *tp,
205				 struct cls_bpf_prog *prog)
206{
207	int err;
208
209	if (!prog->offloaded)
210		return;
211
212	err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
213	if (err) {
214		pr_err("Stopping hardware offload failed: %d\n", err);
215		return;
216	}
217
218	prog->offloaded = false;
219}
220
221static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
222					 struct cls_bpf_prog *prog)
223{
224	if (!prog->offloaded)
225		return;
226
227	cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
228}
229
230static int cls_bpf_init(struct tcf_proto *tp)
231{
232	struct cls_bpf_head *head;
233
234	head = kzalloc(sizeof(*head), GFP_KERNEL);
235	if (head == NULL)
236		return -ENOBUFS;
237
238	INIT_LIST_HEAD_RCU(&head->plist);
239	rcu_assign_pointer(tp->root, head);
240
241	return 0;
242}
243
244static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog)
245{
246	tcf_exts_destroy(&prog->exts);
247
248	if (cls_bpf_is_ebpf(prog))
249		bpf_prog_put(prog->filter);
250	else
251		bpf_prog_destroy(prog->filter);
252
253	kfree(prog->bpf_name);
254	kfree(prog->bpf_ops);
255	kfree(prog);
256}
257
258static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
259{
260	__cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu));
261}
262
263static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
264{
265	cls_bpf_stop_offload(tp, prog);
266	list_del_rcu(&prog->link);
267	tcf_unbind_filter(tp, &prog->res);
268	call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu);
269}
270
271static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
272{
273	__cls_bpf_delete(tp, (struct cls_bpf_prog *) arg);
274	return 0;
275}
276
277static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
278{
279	struct cls_bpf_head *head = rtnl_dereference(tp->root);
280	struct cls_bpf_prog *prog, *tmp;
281
282	if (!force && !list_empty(&head->plist))
283		return false;
284
285	list_for_each_entry_safe(prog, tmp, &head->plist, link)
286		__cls_bpf_delete(tp, prog);
287
288	kfree_rcu(head, rcu);
289	return true;
290}
291
292static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
293{
294	struct cls_bpf_head *head = rtnl_dereference(tp->root);
295	struct cls_bpf_prog *prog;
296	unsigned long ret = 0UL;
297
298	list_for_each_entry(prog, &head->plist, link) {
299		if (prog->handle == handle) {
300			ret = (unsigned long) prog;
301			break;
302		}
303	}
304
305	return ret;
306}
307
308static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
309{
310	struct sock_filter *bpf_ops;
311	struct sock_fprog_kern fprog_tmp;
312	struct bpf_prog *fp;
313	u16 bpf_size, bpf_num_ops;
314	int ret;
315
316	bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
317	if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
318		return -EINVAL;
319
320	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
321	if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
322		return -EINVAL;
323
324	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
325	if (bpf_ops == NULL)
326		return -ENOMEM;
327
328	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
329
330	fprog_tmp.len = bpf_num_ops;
331	fprog_tmp.filter = bpf_ops;
332
333	ret = bpf_prog_create(&fp, &fprog_tmp);
334	if (ret < 0) {
335		kfree(bpf_ops);
336		return ret;
337	}
338
339	prog->bpf_ops = bpf_ops;
340	prog->bpf_num_ops = bpf_num_ops;
341	prog->bpf_name = NULL;
342	prog->filter = fp;
343
344	return 0;
345}
346
347static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
348				 const struct tcf_proto *tp)
349{
350	struct bpf_prog *fp;
351	char *name = NULL;
352	u32 bpf_fd;
353
354	bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
355
356	fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS);
357	if (IS_ERR(fp))
358		return PTR_ERR(fp);
359
360	if (tb[TCA_BPF_NAME]) {
361		name = nla_memdup(tb[TCA_BPF_NAME], GFP_KERNEL);
362		if (!name) {
363			bpf_prog_put(fp);
364			return -ENOMEM;
365		}
366	}
367
368	prog->bpf_ops = NULL;
369	prog->bpf_name = name;
370	prog->filter = fp;
371
372	if (fp->dst_needed && !(tp->q->flags & TCQ_F_INGRESS))
373		netif_keep_dst(qdisc_dev(tp->q));
374
375	return 0;
376}
377
378static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
379				   struct cls_bpf_prog *prog,
380				   unsigned long base, struct nlattr **tb,
381				   struct nlattr *est, bool ovr)
382{
383	bool is_bpf, is_ebpf, have_exts = false;
384	struct tcf_exts exts;
385	u32 gen_flags = 0;
386	int ret;
387
388	is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
389	is_ebpf = tb[TCA_BPF_FD];
390	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
391		return -EINVAL;
392
393	ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
394	if (ret < 0)
395		return ret;
396	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
397	if (ret < 0)
398		goto errout;
399
400	if (tb[TCA_BPF_FLAGS]) {
401		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
402
403		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
404			ret = -EINVAL;
405			goto errout;
406		}
407
408		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
409	}
410	if (tb[TCA_BPF_FLAGS_GEN]) {
411		gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
412		if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
413		    !tc_flags_valid(gen_flags)) {
414			ret = -EINVAL;
415			goto errout;
416		}
417	}
418
419	prog->exts_integrated = have_exts;
420	prog->gen_flags = gen_flags;
421
422	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
423		       cls_bpf_prog_from_efd(tb, prog, tp);
424	if (ret < 0)
425		goto errout;
426
427	if (tb[TCA_BPF_CLASSID]) {
428		prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
429		tcf_bind_filter(tp, &prog->res, base);
430	}
431
432	tcf_exts_change(tp, &prog->exts, &exts);
433	return 0;
434
435errout:
436	tcf_exts_destroy(&exts);
437	return ret;
438}
439
440static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
441				   struct cls_bpf_head *head)
442{
443	unsigned int i = 0x80000000;
444	u32 handle;
445
446	do {
447		if (++head->hgen == 0x7FFFFFFF)
448			head->hgen = 1;
449	} while (--i > 0 && cls_bpf_get(tp, head->hgen));
450
451	if (unlikely(i == 0)) {
452		pr_err("Insufficient number of handles\n");
453		handle = 0;
454	} else {
455		handle = head->hgen;
456	}
457
458	return handle;
459}
460
461static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
462			  struct tcf_proto *tp, unsigned long base,
463			  u32 handle, struct nlattr **tca,
464			  unsigned long *arg, bool ovr)
465{
466	struct cls_bpf_head *head = rtnl_dereference(tp->root);
467	struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg;
468	struct nlattr *tb[TCA_BPF_MAX + 1];
469	struct cls_bpf_prog *prog;
470	int ret;
471
472	if (tca[TCA_OPTIONS] == NULL)
473		return -EINVAL;
474
475	ret = nla_parse_nested(tb, TCA_BPF_MAX, tca[TCA_OPTIONS], bpf_policy);
476	if (ret < 0)
477		return ret;
478
479	prog = kzalloc(sizeof(*prog), GFP_KERNEL);
480	if (!prog)
481		return -ENOBUFS;
482
483	ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
484	if (ret < 0)
485		goto errout;
486
487	if (oldprog) {
488		if (handle && oldprog->handle != handle) {
489			ret = -EINVAL;
490			goto errout;
491		}
492	}
493
494	if (handle == 0)
495		prog->handle = cls_bpf_grab_new_handle(tp, head);
496	else
497		prog->handle = handle;
498	if (prog->handle == 0) {
499		ret = -EINVAL;
500		goto errout;
501	}
502
503	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE],
504				      ovr);
505	if (ret < 0)
506		goto errout;
507
508	ret = cls_bpf_offload(tp, prog, oldprog);
509	if (ret) {
510		__cls_bpf_delete_prog(prog);
511		return ret;
512	}
513
514	if (oldprog) {
515		list_replace_rcu(&oldprog->link, &prog->link);
516		tcf_unbind_filter(tp, &oldprog->res);
517		call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
518	} else {
519		list_add_rcu(&prog->link, &head->plist);
520	}
521
522	*arg = (unsigned long) prog;
523	return 0;
524
525errout:
526	tcf_exts_destroy(&prog->exts);
527	kfree(prog);
528	return ret;
529}
530
531static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
532				 struct sk_buff *skb)
533{
534	struct nlattr *nla;
535
536	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
537		return -EMSGSIZE;
538
539	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
540			  sizeof(struct sock_filter));
541	if (nla == NULL)
542		return -EMSGSIZE;
543
544	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
545
546	return 0;
547}
548
549static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
550				  struct sk_buff *skb)
551{
552	struct nlattr *nla;
553
554	if (prog->bpf_name &&
555	    nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
556		return -EMSGSIZE;
557
558	nla = nla_reserve(skb, TCA_BPF_TAG, sizeof(prog->filter->tag));
559	if (nla == NULL)
560		return -EMSGSIZE;
561
562	memcpy(nla_data(nla), prog->filter->tag, nla_len(nla));
563
564	return 0;
565}
566
567static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
568			struct sk_buff *skb, struct tcmsg *tm)
569{
570	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
571	struct nlattr *nest;
572	u32 bpf_flags = 0;
573	int ret;
574
575	if (prog == NULL)
576		return skb->len;
577
578	tm->tcm_handle = prog->handle;
579
580	cls_bpf_offload_update_stats(tp, prog);
581
582	nest = nla_nest_start(skb, TCA_OPTIONS);
583	if (nest == NULL)
584		goto nla_put_failure;
585
586	if (prog->res.classid &&
587	    nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
588		goto nla_put_failure;
589
590	if (cls_bpf_is_ebpf(prog))
591		ret = cls_bpf_dump_ebpf_info(prog, skb);
592	else
593		ret = cls_bpf_dump_bpf_info(prog, skb);
594	if (ret)
595		goto nla_put_failure;
596
597	if (tcf_exts_dump(skb, &prog->exts) < 0)
598		goto nla_put_failure;
599
600	if (prog->exts_integrated)
601		bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
602	if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
603		goto nla_put_failure;
604	if (prog->gen_flags &&
605	    nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
606		goto nla_put_failure;
607
608	nla_nest_end(skb, nest);
609
610	if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
611		goto nla_put_failure;
612
613	return skb->len;
614
615nla_put_failure:
616	nla_nest_cancel(skb, nest);
617	return -1;
618}
619
620static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
621{
622	struct cls_bpf_head *head = rtnl_dereference(tp->root);
623	struct cls_bpf_prog *prog;
624
625	list_for_each_entry(prog, &head->plist, link) {
626		if (arg->count < arg->skip)
627			goto skip;
628		if (arg->fn(tp, (unsigned long) prog, arg) < 0) {
629			arg->stop = 1;
630			break;
631		}
632skip:
633		arg->count++;
634	}
635}
636
637static struct tcf_proto_ops cls_bpf_ops __read_mostly = {
638	.kind		=	"bpf",
639	.owner		=	THIS_MODULE,
640	.classify	=	cls_bpf_classify,
641	.init		=	cls_bpf_init,
642	.destroy	=	cls_bpf_destroy,
643	.get		=	cls_bpf_get,
644	.change		=	cls_bpf_change,
645	.delete		=	cls_bpf_delete,
646	.walk		=	cls_bpf_walk,
647	.dump		=	cls_bpf_dump,
648};
649
650static int __init cls_bpf_init_mod(void)
651{
652	return register_tcf_proto_ops(&cls_bpf_ops);
653}
654
655static void __exit cls_bpf_exit_mod(void)
656{
657	unregister_tcf_proto_ops(&cls_bpf_ops);
658}
659
660module_init(cls_bpf_init_mod);
661module_exit(cls_bpf_exit_mod);