sch_tbf.c - net/sched/sch_tbf.c - Linux diff v4.17 - Bootlin Elixir Cross Referencer

  1/*
  2 * net/sched/sch_tbf.c	Token Bucket Filter queue.
  3 *
  4 *		This program is free software; you can redistribute it and/or
  5 *		modify it under the terms of the GNU General Public License
  6 *		as published by the Free Software Foundation; either version
  7 *		2 of the License, or (at your option) any later version.
  8 *
  9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 11 *						 original idea by Martin Devera
 12 *
 13 */
 14
 15#include <linux/module.h>
 16#include <linux/types.h>
 17#include <linux/kernel.h>
 18#include <linux/string.h>
 19#include <linux/errno.h>
 20#include <linux/skbuff.h>
 21#include <net/netlink.h>
 22#include <net/sch_generic.h>
 23#include <net/pkt_sched.h>
 24
 25
 26/*	Simple Token Bucket Filter.
 27	=======================================
 28
 29	SOURCE.
 30	-------
 31
 32	None.
 33
 34	Description.
 35	------------
 36
 37	A data flow obeys TBF with rate R and depth B, if for any
 38	time interval t_i...t_f the number of transmitted bits
 39	does not exceed B + R*(t_f-t_i).
 40
 41	Packetized version of this definition:
 42	The sequence of packets of sizes s_i served at moments t_i
 43	obeys TBF, if for any i<=k:
 44
 45	s_i+....+s_k <= B + R*(t_k - t_i)
 46
 47	Algorithm.
 48	----------
 49
 50	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
 51
 52	N(t+delta) = min{B/R, N(t) + delta}
 53
 54	If the first packet in queue has length S, it may be
 55	transmitted only at the time t_* when S/R <= N(t_*),
 56	and in this case N(t) jumps:
 57
 58	N(t_* + 0) = N(t_* - 0) - S/R.
 59
 60
 61
 62	Actually, QoS requires two TBF to be applied to a data stream.
 63	One of them controls steady state burst size, another
 64	one with rate P (peak rate) and depth M (equal to link MTU)
 65	limits bursts at a smaller time scale.
 66
 67	It is easy to see that P>R, and B>M. If P is infinity, this double
 68	TBF is equivalent to a single one.
 69
 70	When TBF works in reshaping mode, latency is estimated as:
 71
 72	lat = max ((L-B)/R, (L-M)/P)
 73
 74
 75	NOTES.
 76	------
 77
 78	If TBF throttles, it starts a watchdog timer, which will wake it up
 79	when it is ready to transmit.
 80	Note that the minimal timer resolution is 1/HZ.
 81	If no new packets arrive during this period,
 82	or if the device is not awaken by EOI for some previous packet,
 83	TBF can stop its activity for 1/HZ.
 84
 85
 86	This means, that with depth B, the maximal rate is
 87
 88	R_crit = B*HZ
 89
 90	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
 91
 92	Note that the peak rate TBF is much more tough: with MTU 1500
 93	P_crit = 150Kbytes/sec. So, if you need greater peak
 94	rates, use alpha with HZ=1000 :-)
 95
 96	With classful TBF, limit is just kept for backwards compatibility.
 97	It is passed to the default bfifo qdisc - if the inner qdisc is
 98	changed the limit is not effective anymore.
 99*/
100
101struct tbf_sched_data {
102/* Parameters */
103	u32		limit;		/* Maximal length of backlog: bytes */
 
 
104	u32		max_size;
105	s64		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
106	s64		mtu;
107	struct psched_ratecfg rate;
108	struct psched_ratecfg peak;
109
110/* Variables */
111	s64	tokens;			/* Current number of B tokens */
112	s64	ptokens;		/* Current number of P tokens */
113	s64	t_c;			/* Time check-point */
114	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
115	struct qdisc_watchdog watchdog;	/* Watchdog timer */
116};
117
 
 
118
119/* Time to Length, convert time in ns to length in bytes
120 * to determinate how many bytes can be sent in given time.
121 */
122static u64 psched_ns_t2l(const struct psched_ratecfg *r,
123			 u64 time_in_ns)
124{
125	/* The formula is :
126	 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
127	 */
128	u64 len = time_in_ns * r->rate_bytes_ps;
129
130	do_div(len, NSEC_PER_SEC);
131
132	if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
133		do_div(len, 53);
134		len = len * 48;
135	}
136
137	if (len > r->overhead)
138		len -= r->overhead;
139	else
140		len = 0;
141
142	return len;
143}
144
145/* GSO packet is too big, segment it so that tbf can transmit
146 * each segment in time
147 */
148static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
149		       struct sk_buff **to_free)
150{
151	struct tbf_sched_data *q = qdisc_priv(sch);
152	struct sk_buff *segs, *nskb;
153	netdev_features_t features = netif_skb_features(skb);
154	unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
155	int ret, nb;
156
157	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
158
159	if (IS_ERR_OR_NULL(segs))
160		return qdisc_drop(skb, sch, to_free);
161
162	nb = 0;
163	while (segs) {
164		nskb = segs->next;
165		segs->next = NULL;
166		qdisc_skb_cb(segs)->pkt_len = segs->len;
167		len += segs->len;
168		ret = qdisc_enqueue(segs, q->qdisc, to_free);
169		if (ret != NET_XMIT_SUCCESS) {
170			if (net_xmit_drop_count(ret))
171				qdisc_qstats_drop(sch);
172		} else {
173			nb++;
174		}
175		segs = nskb;
176	}
177	sch->q.qlen += nb;
178	if (nb > 1)
179		qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
180	consume_skb(skb);
181	return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
182}
183
184static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
185		       struct sk_buff **to_free)
186{
187	struct tbf_sched_data *q = qdisc_priv(sch);
188	int ret;
189
190	if (qdisc_pkt_len(skb) > q->max_size) {
191		if (skb_is_gso(skb) &&
192		    skb_gso_validate_mac_len(skb, q->max_size))
193			return tbf_segment(skb, sch, to_free);
194		return qdisc_drop(skb, sch, to_free);
195	}
196	ret = qdisc_enqueue(skb, q->qdisc, to_free);
197	if (ret != NET_XMIT_SUCCESS) {
198		if (net_xmit_drop_count(ret))
199			qdisc_qstats_drop(sch);
200		return ret;
201	}
202
203	qdisc_qstats_backlog_inc(sch, skb);
204	sch->q.qlen++;
205	return NET_XMIT_SUCCESS;
206}
207
208static bool tbf_peak_present(const struct tbf_sched_data *q)
209{
210	return q->peak.rate_bytes_ps;
 
 
 
 
 
 
 
211}
212
213static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
214{
215	struct tbf_sched_data *q = qdisc_priv(sch);
216	struct sk_buff *skb;
217
218	skb = q->qdisc->ops->peek(q->qdisc);
219
220	if (skb) {
221		s64 now;
222		s64 toks;
223		s64 ptoks = 0;
224		unsigned int len = qdisc_pkt_len(skb);
225
226		now = ktime_get_ns();
227		toks = min_t(s64, now - q->t_c, q->buffer);
228
229		if (tbf_peak_present(q)) {
230			ptoks = toks + q->ptokens;
231			if (ptoks > q->mtu)
232				ptoks = q->mtu;
233			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
234		}
235		toks += q->tokens;
236		if (toks > q->buffer)
237			toks = q->buffer;
238		toks -= (s64) psched_l2t_ns(&q->rate, len);
239
240		if ((toks|ptoks) >= 0) {
241			skb = qdisc_dequeue_peeked(q->qdisc);
242			if (unlikely(!skb))
243				return NULL;
244
245			q->t_c = now;
246			q->tokens = toks;
247			q->ptokens = ptoks;
248			qdisc_qstats_backlog_dec(sch, skb);
249			sch->q.qlen--;
 
250			qdisc_bstats_update(sch, skb);
251			return skb;
252		}
253
254		qdisc_watchdog_schedule_ns(&q->watchdog,
255					   now + max_t(long, -toks, -ptoks));
256
257		/* Maybe we have a shorter packet in the queue,
258		   which can be sent now. It sounds cool,
259		   but, however, this is wrong in principle.
260		   We MUST NOT reorder packets under these circumstances.
261
262		   Really, if we split the flow into independent
263		   subflows, it would be a very good solution.
264		   This is the main idea of all FQ algorithms
265		   (cf. CSZ, HPFQ, HFSC)
266		 */
267
268		qdisc_qstats_overlimit(sch);
269	}
270	return NULL;
271}
272
273static void tbf_reset(struct Qdisc *sch)
274{
275	struct tbf_sched_data *q = qdisc_priv(sch);
276
277	qdisc_reset(q->qdisc);
278	sch->qstats.backlog = 0;
279	sch->q.qlen = 0;
280	q->t_c = ktime_get_ns();
281	q->tokens = q->buffer;
282	q->ptokens = q->mtu;
283	qdisc_watchdog_cancel(&q->watchdog);
284}
285
286static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
287	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
288	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
289	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
290	[TCA_TBF_RATE64]	= { .type = NLA_U64 },
291	[TCA_TBF_PRATE64]	= { .type = NLA_U64 },
292	[TCA_TBF_BURST] = { .type = NLA_U32 },
293	[TCA_TBF_PBURST] = { .type = NLA_U32 },
294};
295
296static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
297		      struct netlink_ext_ack *extack)
298{
299	int err;
300	struct tbf_sched_data *q = qdisc_priv(sch);
301	struct nlattr *tb[TCA_TBF_MAX + 1];
302	struct tc_tbf_qopt *qopt;
 
 
303	struct Qdisc *child = NULL;
304	struct psched_ratecfg rate;
305	struct psched_ratecfg peak;
306	u64 max_size;
307	s64 buffer, mtu;
308	u64 rate64 = 0, prate64 = 0;
309
310	err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy, NULL);
311	if (err < 0)
312		return err;
313
314	err = -EINVAL;
315	if (tb[TCA_TBF_PARMS] == NULL)
316		goto done;
317
318	qopt = nla_data(tb[TCA_TBF_PARMS]);
319	if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
320		qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
321					      tb[TCA_TBF_RTAB],
322					      NULL));
323
324	if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
325			qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
326						      tb[TCA_TBF_PTAB],
327						      NULL));
328
329	buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
330	mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
331
332	if (tb[TCA_TBF_RATE64])
333		rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
334	psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
335
336	if (tb[TCA_TBF_BURST]) {
337		max_size = nla_get_u32(tb[TCA_TBF_BURST]);
338		buffer = psched_l2t_ns(&rate, max_size);
339	} else {
340		max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
341	}
342
343	if (qopt->peakrate.rate) {
344		if (tb[TCA_TBF_PRATE64])
345			prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
346		psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
347		if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
348			pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
349					peak.rate_bytes_ps, rate.rate_bytes_ps);
350			err = -EINVAL;
351			goto done;
352		}
353
354		if (tb[TCA_TBF_PBURST]) {
355			u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
356			max_size = min_t(u32, max_size, pburst);
357			mtu = psched_l2t_ns(&peak, pburst);
358		} else {
359			max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
360		}
361	} else {
362		memset(&peak, 0, sizeof(peak));
363	}
364
365	if (max_size < psched_mtu(qdisc_dev(sch)))
366		pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
367				    max_size, qdisc_dev(sch)->name,
368				    psched_mtu(qdisc_dev(sch)));
369
370	if (!max_size) {
371		err = -EINVAL;
372		goto done;
 
 
 
 
 
373	}
 
 
374
375	if (q->qdisc != &noop_qdisc) {
376		err = fifo_set_limit(q->qdisc, qopt->limit);
377		if (err)
378			goto done;
379	} else if (qopt->limit > 0) {
380		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
381					 extack);
382		if (IS_ERR(child)) {
383			err = PTR_ERR(child);
384			goto done;
385		}
386
387		/* child is fifo, no need to check for noop_qdisc */
388		qdisc_hash_add(child, true);
389	}
390
391	sch_tree_lock(sch);
392	if (child) {
393		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
394					  q->qdisc->qstats.backlog);
395		qdisc_destroy(q->qdisc);
396		q->qdisc = child;
397	}
398	q->limit = qopt->limit;
399	if (tb[TCA_TBF_PBURST])
400		q->mtu = mtu;
401	else
402		q->mtu = PSCHED_TICKS2NS(qopt->mtu);
403	q->max_size = max_size;
404	if (tb[TCA_TBF_BURST])
405		q->buffer = buffer;
406	else
407		q->buffer = PSCHED_TICKS2NS(qopt->buffer);
408	q->tokens = q->buffer;
409	q->ptokens = q->mtu;
410
411	memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
412	memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
413
414	sch_tree_unlock(sch);
415	err = 0;
416done:
 
 
 
 
417	return err;
418}
419
420static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
421		    struct netlink_ext_ack *extack)
422{
423	struct tbf_sched_data *q = qdisc_priv(sch);
424
425	qdisc_watchdog_init(&q->watchdog, sch);
426	q->qdisc = &noop_qdisc;
427
428	if (!opt)
429		return -EINVAL;
430
431	q->t_c = ktime_get_ns();
 
 
432
433	return tbf_change(sch, opt, extack);
434}
435
436static void tbf_destroy(struct Qdisc *sch)
437{
438	struct tbf_sched_data *q = qdisc_priv(sch);
439
440	qdisc_watchdog_cancel(&q->watchdog);
 
 
 
 
 
 
441	qdisc_destroy(q->qdisc);
442}
443
444static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
445{
446	struct tbf_sched_data *q = qdisc_priv(sch);
447	struct nlattr *nest;
448	struct tc_tbf_qopt opt;
449
450	sch->qstats.backlog = q->qdisc->qstats.backlog;
451	nest = nla_nest_start(skb, TCA_OPTIONS);
452	if (nest == NULL)
453		goto nla_put_failure;
454
455	opt.limit = q->limit;
456	psched_ratecfg_getrate(&opt.rate, &q->rate);
457	if (tbf_peak_present(q))
458		psched_ratecfg_getrate(&opt.peakrate, &q->peak);
459	else
460		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
461	opt.mtu = PSCHED_NS2TICKS(q->mtu);
462	opt.buffer = PSCHED_NS2TICKS(q->buffer);
463	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
464		goto nla_put_failure;
465	if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
466	    nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
467			      TCA_TBF_PAD))
468		goto nla_put_failure;
469	if (tbf_peak_present(q) &&
470	    q->peak.rate_bytes_ps >= (1ULL << 32) &&
471	    nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
472			      TCA_TBF_PAD))
473		goto nla_put_failure;
474
475	return nla_nest_end(skb, nest);
 
476
477nla_put_failure:
478	nla_nest_cancel(skb, nest);
479	return -1;
480}
481
482static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
483			  struct sk_buff *skb, struct tcmsg *tcm)
484{
485	struct tbf_sched_data *q = qdisc_priv(sch);
486
487	tcm->tcm_handle |= TC_H_MIN(1);
488	tcm->tcm_info = q->qdisc->handle;
489
490	return 0;
491}
492
493static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
494		     struct Qdisc **old, struct netlink_ext_ack *extack)
495{
496	struct tbf_sched_data *q = qdisc_priv(sch);
497
498	if (new == NULL)
499		new = &noop_qdisc;
500
501	*old = qdisc_replace(sch, new, &q->qdisc);
 
 
 
 
 
 
502	return 0;
503}
504
505static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
506{
507	struct tbf_sched_data *q = qdisc_priv(sch);
508	return q->qdisc;
509}
510
511static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
512{
513	return 1;
514}
515
 
 
 
 
516static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
517{
518	if (!walker->stop) {
519		if (walker->count >= walker->skip)
520			if (walker->fn(sch, 1, walker) < 0) {
521				walker->stop = 1;
522				return;
523			}
524		walker->count++;
525	}
526}
527
528static const struct Qdisc_class_ops tbf_class_ops = {
529	.graft		=	tbf_graft,
530	.leaf		=	tbf_leaf,
531	.find		=	tbf_find,
 
532	.walk		=	tbf_walk,
533	.dump		=	tbf_dump_class,
534};
535
536static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
537	.next		=	NULL,
538	.cl_ops		=	&tbf_class_ops,
539	.id		=	"tbf",
540	.priv_size	=	sizeof(struct tbf_sched_data),
541	.enqueue	=	tbf_enqueue,
542	.dequeue	=	tbf_dequeue,
543	.peek		=	qdisc_peek_dequeued,
 
544	.init		=	tbf_init,
545	.reset		=	tbf_reset,
546	.destroy	=	tbf_destroy,
547	.change		=	tbf_change,
548	.dump		=	tbf_dump,
549	.owner		=	THIS_MODULE,
550};
551
552static int __init tbf_module_init(void)
553{
554	return register_qdisc(&tbf_qdisc_ops);
555}
556
557static void __exit tbf_module_exit(void)
558{
559	unregister_qdisc(&tbf_qdisc_ops);
560}
561module_init(tbf_module_init)
562module_exit(tbf_module_exit)
563MODULE_LICENSE("GPL");

  1/*
  2 * net/sched/sch_tbf.c	Token Bucket Filter queue.
  3 *
  4 *		This program is free software; you can redistribute it and/or
  5 *		modify it under the terms of the GNU General Public License
  6 *		as published by the Free Software Foundation; either version
  7 *		2 of the License, or (at your option) any later version.
  8 *
  9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 11 *						 original idea by Martin Devera
 12 *
 13 */
 14
 15#include <linux/module.h>
 16#include <linux/types.h>
 17#include <linux/kernel.h>
 18#include <linux/string.h>
 19#include <linux/errno.h>
 20#include <linux/skbuff.h>
 21#include <net/netlink.h>
 
 22#include <net/pkt_sched.h>
 23
 24
 25/*	Simple Token Bucket Filter.
 26	=======================================
 27
 28	SOURCE.
 29	-------
 30
 31	None.
 32
 33	Description.
 34	------------
 35
 36	A data flow obeys TBF with rate R and depth B, if for any
 37	time interval t_i...t_f the number of transmitted bits
 38	does not exceed B + R*(t_f-t_i).
 39
 40	Packetized version of this definition:
 41	The sequence of packets of sizes s_i served at moments t_i
 42	obeys TBF, if for any i<=k:
 43
 44	s_i+....+s_k <= B + R*(t_k - t_i)
 45
 46	Algorithm.
 47	----------
 48
 49	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
 50
 51	N(t+delta) = min{B/R, N(t) + delta}
 52
 53	If the first packet in queue has length S, it may be
 54	transmitted only at the time t_* when S/R <= N(t_*),
 55	and in this case N(t) jumps:
 56
 57	N(t_* + 0) = N(t_* - 0) - S/R.
 58
 59
 60
 61	Actually, QoS requires two TBF to be applied to a data stream.
 62	One of them controls steady state burst size, another
 63	one with rate P (peak rate) and depth M (equal to link MTU)
 64	limits bursts at a smaller time scale.
 65
 66	It is easy to see that P>R, and B>M. If P is infinity, this double
 67	TBF is equivalent to a single one.
 68
 69	When TBF works in reshaping mode, latency is estimated as:
 70
 71	lat = max ((L-B)/R, (L-M)/P)
 72
 73
 74	NOTES.
 75	------
 76
 77	If TBF throttles, it starts a watchdog timer, which will wake it up
 78	when it is ready to transmit.
 79	Note that the minimal timer resolution is 1/HZ.
 80	If no new packets arrive during this period,
 81	or if the device is not awaken by EOI for some previous packet,
 82	TBF can stop its activity for 1/HZ.
 83
 84
 85	This means, that with depth B, the maximal rate is
 86
 87	R_crit = B*HZ
 88
 89	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
 90
 91	Note that the peak rate TBF is much more tough: with MTU 1500
 92	P_crit = 150Kbytes/sec. So, if you need greater peak
 93	rates, use alpha with HZ=1000 :-)
 94
 95	With classful TBF, limit is just kept for backwards compatibility.
 96	It is passed to the default bfifo qdisc - if the inner qdisc is
 97	changed the limit is not effective anymore.
 98*/
 99
100struct tbf_sched_data {
101/* Parameters */
102	u32		limit;		/* Maximal length of backlog: bytes */
103	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
104	u32		mtu;
105	u32		max_size;
106	struct qdisc_rate_table	*R_tab;
107	struct qdisc_rate_table	*P_tab;
 
 
108
109/* Variables */
110	long	tokens;			/* Current number of B tokens */
111	long	ptokens;		/* Current number of P tokens */
112	psched_time_t	t_c;		/* Time check-point */
113	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
114	struct qdisc_watchdog watchdog;	/* Watchdog timer */
115};
116
117#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
118#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
119
120static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121{
122	struct tbf_sched_data *q = qdisc_priv(sch);
123	int ret;
124
125	if (qdisc_pkt_len(skb) > q->max_size)
126		return qdisc_reshape_fail(skb, sch);
127
128	ret = qdisc_enqueue(skb, q->qdisc);
 
 
 
129	if (ret != NET_XMIT_SUCCESS) {
130		if (net_xmit_drop_count(ret))
131			sch->qstats.drops++;
132		return ret;
133	}
134
 
135	sch->q.qlen++;
136	return NET_XMIT_SUCCESS;
137}
138
139static unsigned int tbf_drop(struct Qdisc *sch)
140{
141	struct tbf_sched_data *q = qdisc_priv(sch);
142	unsigned int len = 0;
143
144	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
145		sch->q.qlen--;
146		sch->qstats.drops++;
147	}
148	return len;
149}
150
151static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
152{
153	struct tbf_sched_data *q = qdisc_priv(sch);
154	struct sk_buff *skb;
155
156	skb = q->qdisc->ops->peek(q->qdisc);
157
158	if (skb) {
159		psched_time_t now;
160		long toks;
161		long ptoks = 0;
162		unsigned int len = qdisc_pkt_len(skb);
163
164		now = psched_get_time();
165		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
166
167		if (q->P_tab) {
168			ptoks = toks + q->ptokens;
169			if (ptoks > (long)q->mtu)
170				ptoks = q->mtu;
171			ptoks -= L2T_P(q, len);
172		}
173		toks += q->tokens;
174		if (toks > (long)q->buffer)
175			toks = q->buffer;
176		toks -= L2T(q, len);
177
178		if ((toks|ptoks) >= 0) {
179			skb = qdisc_dequeue_peeked(q->qdisc);
180			if (unlikely(!skb))
181				return NULL;
182
183			q->t_c = now;
184			q->tokens = toks;
185			q->ptokens = ptoks;
 
186			sch->q.qlen--;
187			qdisc_unthrottled(sch);
188			qdisc_bstats_update(sch, skb);
189			return skb;
190		}
191
192		qdisc_watchdog_schedule(&q->watchdog,
193					now + max_t(long, -toks, -ptoks));
194
195		/* Maybe we have a shorter packet in the queue,
196		   which can be sent now. It sounds cool,
197		   but, however, this is wrong in principle.
198		   We MUST NOT reorder packets under these circumstances.
199
200		   Really, if we split the flow into independent
201		   subflows, it would be a very good solution.
202		   This is the main idea of all FQ algorithms
203		   (cf. CSZ, HPFQ, HFSC)
204		 */
205
206		sch->qstats.overlimits++;
207	}
208	return NULL;
209}
210
211static void tbf_reset(struct Qdisc *sch)
212{
213	struct tbf_sched_data *q = qdisc_priv(sch);
214
215	qdisc_reset(q->qdisc);
 
216	sch->q.qlen = 0;
217	q->t_c = psched_get_time();
218	q->tokens = q->buffer;
219	q->ptokens = q->mtu;
220	qdisc_watchdog_cancel(&q->watchdog);
221}
222
223static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
224	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
225	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
226	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 
 
 
 
227};
228
229static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 
230{
231	int err;
232	struct tbf_sched_data *q = qdisc_priv(sch);
233	struct nlattr *tb[TCA_TBF_PTAB + 1];
234	struct tc_tbf_qopt *qopt;
235	struct qdisc_rate_table *rtab = NULL;
236	struct qdisc_rate_table *ptab = NULL;
237	struct Qdisc *child = NULL;
238	int max_size, n;
 
 
 
 
239
240	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
241	if (err < 0)
242		return err;
243
244	err = -EINVAL;
245	if (tb[TCA_TBF_PARMS] == NULL)
246		goto done;
247
248	qopt = nla_data(tb[TCA_TBF_PARMS]);
249	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
250	if (rtab == NULL)
251		goto done;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
253	if (qopt->peakrate.rate) {
254		if (qopt->peakrate.rate > qopt->rate.rate)
255			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
256		if (ptab == NULL)
 
 
 
 
257			goto done;
 
 
 
 
 
 
 
 
 
 
 
258	}
259
260	for (n = 0; n < 256; n++)
261		if (rtab->data[n] > qopt->buffer)
262			break;
263	max_size = (n << qopt->rate.cell_log) - 1;
264	if (ptab) {
265		int size;
266
267		for (n = 0; n < 256; n++)
268			if (ptab->data[n] > qopt->mtu)
269				break;
270		size = (n << qopt->peakrate.cell_log) - 1;
271		if (size < max_size)
272			max_size = size;
273	}
274	if (max_size < 0)
275		goto done;
276
277	if (q->qdisc != &noop_qdisc) {
278		err = fifo_set_limit(q->qdisc, qopt->limit);
279		if (err)
280			goto done;
281	} else if (qopt->limit > 0) {
282		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
 
283		if (IS_ERR(child)) {
284			err = PTR_ERR(child);
285			goto done;
286		}
 
 
 
287	}
288
289	sch_tree_lock(sch);
290	if (child) {
291		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
 
292		qdisc_destroy(q->qdisc);
293		q->qdisc = child;
294	}
295	q->limit = qopt->limit;
296	q->mtu = qopt->mtu;
 
 
 
297	q->max_size = max_size;
298	q->buffer = qopt->buffer;
 
 
 
299	q->tokens = q->buffer;
300	q->ptokens = q->mtu;
301
302	swap(q->R_tab, rtab);
303	swap(q->P_tab, ptab);
304
305	sch_tree_unlock(sch);
306	err = 0;
307done:
308	if (rtab)
309		qdisc_put_rtab(rtab);
310	if (ptab)
311		qdisc_put_rtab(ptab);
312	return err;
313}
314
315static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 
316{
317	struct tbf_sched_data *q = qdisc_priv(sch);
318
319	if (opt == NULL)
 
 
 
320		return -EINVAL;
321
322	q->t_c = psched_get_time();
323	qdisc_watchdog_init(&q->watchdog, sch);
324	q->qdisc = &noop_qdisc;
325
326	return tbf_change(sch, opt);
327}
328
329static void tbf_destroy(struct Qdisc *sch)
330{
331	struct tbf_sched_data *q = qdisc_priv(sch);
332
333	qdisc_watchdog_cancel(&q->watchdog);
334
335	if (q->P_tab)
336		qdisc_put_rtab(q->P_tab);
337	if (q->R_tab)
338		qdisc_put_rtab(q->R_tab);
339
340	qdisc_destroy(q->qdisc);
341}
342
343static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
344{
345	struct tbf_sched_data *q = qdisc_priv(sch);
346	struct nlattr *nest;
347	struct tc_tbf_qopt opt;
348
 
349	nest = nla_nest_start(skb, TCA_OPTIONS);
350	if (nest == NULL)
351		goto nla_put_failure;
352
353	opt.limit = q->limit;
354	opt.rate = q->R_tab->rate;
355	if (q->P_tab)
356		opt.peakrate = q->P_tab->rate;
357	else
358		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
359	opt.mtu = q->mtu;
360	opt.buffer = q->buffer;
361	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
 
 
 
 
 
 
 
 
 
 
362
363	nla_nest_end(skb, nest);
364	return skb->len;
365
366nla_put_failure:
367	nla_nest_cancel(skb, nest);
368	return -1;
369}
370
371static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
372			  struct sk_buff *skb, struct tcmsg *tcm)
373{
374	struct tbf_sched_data *q = qdisc_priv(sch);
375
376	tcm->tcm_handle |= TC_H_MIN(1);
377	tcm->tcm_info = q->qdisc->handle;
378
379	return 0;
380}
381
382static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
383		     struct Qdisc **old)
384{
385	struct tbf_sched_data *q = qdisc_priv(sch);
386
387	if (new == NULL)
388		new = &noop_qdisc;
389
390	sch_tree_lock(sch);
391	*old = q->qdisc;
392	q->qdisc = new;
393	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
394	qdisc_reset(*old);
395	sch_tree_unlock(sch);
396
397	return 0;
398}
399
400static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
401{
402	struct tbf_sched_data *q = qdisc_priv(sch);
403	return q->qdisc;
404}
405
406static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
407{
408	return 1;
409}
410
411static void tbf_put(struct Qdisc *sch, unsigned long arg)
412{
413}
414
415static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
416{
417	if (!walker->stop) {
418		if (walker->count >= walker->skip)
419			if (walker->fn(sch, 1, walker) < 0) {
420				walker->stop = 1;
421				return;
422			}
423		walker->count++;
424	}
425}
426
427static const struct Qdisc_class_ops tbf_class_ops = {
428	.graft		=	tbf_graft,
429	.leaf		=	tbf_leaf,
430	.get		=	tbf_get,
431	.put		=	tbf_put,
432	.walk		=	tbf_walk,
433	.dump		=	tbf_dump_class,
434};
435
436static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
437	.next		=	NULL,
438	.cl_ops		=	&tbf_class_ops,
439	.id		=	"tbf",
440	.priv_size	=	sizeof(struct tbf_sched_data),
441	.enqueue	=	tbf_enqueue,
442	.dequeue	=	tbf_dequeue,
443	.peek		=	qdisc_peek_dequeued,
444	.drop		=	tbf_drop,
445	.init		=	tbf_init,
446	.reset		=	tbf_reset,
447	.destroy	=	tbf_destroy,
448	.change		=	tbf_change,
449	.dump		=	tbf_dump,
450	.owner		=	THIS_MODULE,
451};
452
453static int __init tbf_module_init(void)
454{
455	return register_qdisc(&tbf_qdisc_ops);
456}
457
458static void __exit tbf_module_exit(void)
459{
460	unregister_qdisc(&tbf_qdisc_ops);
461}
462module_init(tbf_module_init)
463module_exit(tbf_module_exit)
464MODULE_LICENSE("GPL");