Linux Audio

Check our new training course

Linux kernel drivers training

Mar 31-Apr 9, 2025, special US time zones
Register
Loading...
v3.5.6
 
  1/*
  2 * net/sched/sch_tbf.c	Token Bucket Filter queue.
  3 *
  4 *		This program is free software; you can redistribute it and/or
  5 *		modify it under the terms of the GNU General Public License
  6 *		as published by the Free Software Foundation; either version
  7 *		2 of the License, or (at your option) any later version.
  8 *
  9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 11 *						 original idea by Martin Devera
 12 *
 13 */
 14
 15#include <linux/module.h>
 16#include <linux/types.h>
 17#include <linux/kernel.h>
 18#include <linux/string.h>
 19#include <linux/errno.h>
 20#include <linux/skbuff.h>
 21#include <net/netlink.h>
 
 22#include <net/pkt_sched.h>
 23
 24
 25/*	Simple Token Bucket Filter.
 26	=======================================
 27
 28	SOURCE.
 29	-------
 30
 31	None.
 32
 33	Description.
 34	------------
 35
 36	A data flow obeys TBF with rate R and depth B, if for any
 37	time interval t_i...t_f the number of transmitted bits
 38	does not exceed B + R*(t_f-t_i).
 39
 40	Packetized version of this definition:
 41	The sequence of packets of sizes s_i served at moments t_i
 42	obeys TBF, if for any i<=k:
 43
 44	s_i+....+s_k <= B + R*(t_k - t_i)
 45
 46	Algorithm.
 47	----------
 48
 49	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
 50
 51	N(t+delta) = min{B/R, N(t) + delta}
 52
 53	If the first packet in queue has length S, it may be
 54	transmitted only at the time t_* when S/R <= N(t_*),
 55	and in this case N(t) jumps:
 56
 57	N(t_* + 0) = N(t_* - 0) - S/R.
 58
 59
 60
 61	Actually, QoS requires two TBF to be applied to a data stream.
 62	One of them controls steady state burst size, another
 63	one with rate P (peak rate) and depth M (equal to link MTU)
 64	limits bursts at a smaller time scale.
 65
 66	It is easy to see that P>R, and B>M. If P is infinity, this double
 67	TBF is equivalent to a single one.
 68
 69	When TBF works in reshaping mode, latency is estimated as:
 70
 71	lat = max ((L-B)/R, (L-M)/P)
 72
 73
 74	NOTES.
 75	------
 76
 77	If TBF throttles, it starts a watchdog timer, which will wake it up
 78	when it is ready to transmit.
 79	Note that the minimal timer resolution is 1/HZ.
 80	If no new packets arrive during this period,
 81	or if the device is not awaken by EOI for some previous packet,
 82	TBF can stop its activity for 1/HZ.
 83
 84
 85	This means, that with depth B, the maximal rate is
 86
 87	R_crit = B*HZ
 88
 89	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
 90
 91	Note that the peak rate TBF is much more tough: with MTU 1500
 92	P_crit = 150Kbytes/sec. So, if you need greater peak
 93	rates, use alpha with HZ=1000 :-)
 94
 95	With classful TBF, limit is just kept for backwards compatibility.
 96	It is passed to the default bfifo qdisc - if the inner qdisc is
 97	changed the limit is not effective anymore.
 98*/
 99
100struct tbf_sched_data {
101/* Parameters */
102	u32		limit;		/* Maximal length of backlog: bytes */
103	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
104	u32		mtu;
105	u32		max_size;
106	struct qdisc_rate_table	*R_tab;
107	struct qdisc_rate_table	*P_tab;
 
 
108
109/* Variables */
110	long	tokens;			/* Current number of B tokens */
111	long	ptokens;		/* Current number of P tokens */
112	psched_time_t	t_c;		/* Time check-point */
113	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
114	struct qdisc_watchdog watchdog;	/* Watchdog timer */
115};
116
117#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
118#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
119
120static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121{
122	struct tbf_sched_data *q = qdisc_priv(sch);
123	int ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
125	if (qdisc_pkt_len(skb) > q->max_size)
126		return qdisc_reshape_fail(skb, sch);
 
 
 
 
127
128	ret = qdisc_enqueue(skb, q->qdisc);
 
 
 
 
 
 
129	if (ret != NET_XMIT_SUCCESS) {
130		if (net_xmit_drop_count(ret))
131			sch->qstats.drops++;
132		return ret;
133	}
134
 
135	sch->q.qlen++;
136	return NET_XMIT_SUCCESS;
137}
138
139static unsigned int tbf_drop(struct Qdisc *sch)
140{
141	struct tbf_sched_data *q = qdisc_priv(sch);
142	unsigned int len = 0;
143
144	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
145		sch->q.qlen--;
146		sch->qstats.drops++;
147	}
148	return len;
149}
150
151static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
152{
153	struct tbf_sched_data *q = qdisc_priv(sch);
154	struct sk_buff *skb;
155
156	skb = q->qdisc->ops->peek(q->qdisc);
157
158	if (skb) {
159		psched_time_t now;
160		long toks;
161		long ptoks = 0;
162		unsigned int len = qdisc_pkt_len(skb);
163
164		now = psched_get_time();
165		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
166
167		if (q->P_tab) {
168			ptoks = toks + q->ptokens;
169			if (ptoks > (long)q->mtu)
170				ptoks = q->mtu;
171			ptoks -= L2T_P(q, len);
172		}
173		toks += q->tokens;
174		if (toks > (long)q->buffer)
175			toks = q->buffer;
176		toks -= L2T(q, len);
177
178		if ((toks|ptoks) >= 0) {
179			skb = qdisc_dequeue_peeked(q->qdisc);
180			if (unlikely(!skb))
181				return NULL;
182
183			q->t_c = now;
184			q->tokens = toks;
185			q->ptokens = ptoks;
 
186			sch->q.qlen--;
187			qdisc_unthrottled(sch);
188			qdisc_bstats_update(sch, skb);
189			return skb;
190		}
191
192		qdisc_watchdog_schedule(&q->watchdog,
193					now + max_t(long, -toks, -ptoks));
194
195		/* Maybe we have a shorter packet in the queue,
196		   which can be sent now. It sounds cool,
197		   but, however, this is wrong in principle.
198		   We MUST NOT reorder packets under these circumstances.
199
200		   Really, if we split the flow into independent
201		   subflows, it would be a very good solution.
202		   This is the main idea of all FQ algorithms
203		   (cf. CSZ, HPFQ, HFSC)
204		 */
205
206		sch->qstats.overlimits++;
207	}
208	return NULL;
209}
210
211static void tbf_reset(struct Qdisc *sch)
212{
213	struct tbf_sched_data *q = qdisc_priv(sch);
214
215	qdisc_reset(q->qdisc);
 
216	sch->q.qlen = 0;
217	q->t_c = psched_get_time();
218	q->tokens = q->buffer;
219	q->ptokens = q->mtu;
220	qdisc_watchdog_cancel(&q->watchdog);
221}
222
223static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
224	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
225	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
226	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 
 
 
 
227};
228
229static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 
230{
231	int err;
232	struct tbf_sched_data *q = qdisc_priv(sch);
233	struct nlattr *tb[TCA_TBF_PTAB + 1];
234	struct tc_tbf_qopt *qopt;
235	struct qdisc_rate_table *rtab = NULL;
236	struct qdisc_rate_table *ptab = NULL;
237	struct Qdisc *child = NULL;
238	int max_size, n;
 
 
 
 
239
240	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
 
241	if (err < 0)
242		return err;
243
244	err = -EINVAL;
245	if (tb[TCA_TBF_PARMS] == NULL)
246		goto done;
247
248	qopt = nla_data(tb[TCA_TBF_PARMS]);
249	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
250	if (rtab == NULL)
251		goto done;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
253	if (qopt->peakrate.rate) {
254		if (qopt->peakrate.rate > qopt->rate.rate)
255			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
256		if (ptab == NULL)
 
 
 
 
257			goto done;
258	}
259
260	for (n = 0; n < 256; n++)
261		if (rtab->data[n] > qopt->buffer)
262			break;
263	max_size = (n << qopt->rate.cell_log) - 1;
264	if (ptab) {
265		int size;
266
267		for (n = 0; n < 256; n++)
268			if (ptab->data[n] > qopt->mtu)
269				break;
270		size = (n << qopt->peakrate.cell_log) - 1;
271		if (size < max_size)
272			max_size = size;
273	}
274	if (max_size < 0)
 
 
 
 
 
 
 
275		goto done;
 
276
277	if (q->qdisc != &noop_qdisc) {
278		err = fifo_set_limit(q->qdisc, qopt->limit);
279		if (err)
280			goto done;
281	} else if (qopt->limit > 0) {
282		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
 
283		if (IS_ERR(child)) {
284			err = PTR_ERR(child);
285			goto done;
286		}
 
 
 
287	}
288
289	sch_tree_lock(sch);
290	if (child) {
291		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
292		qdisc_destroy(q->qdisc);
293		q->qdisc = child;
294	}
295	q->limit = qopt->limit;
296	q->mtu = qopt->mtu;
 
 
 
297	q->max_size = max_size;
298	q->buffer = qopt->buffer;
 
 
 
299	q->tokens = q->buffer;
300	q->ptokens = q->mtu;
301
302	swap(q->R_tab, rtab);
303	swap(q->P_tab, ptab);
304
305	sch_tree_unlock(sch);
306	err = 0;
307done:
308	if (rtab)
309		qdisc_put_rtab(rtab);
310	if (ptab)
311		qdisc_put_rtab(ptab);
312	return err;
313}
314
315static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 
316{
317	struct tbf_sched_data *q = qdisc_priv(sch);
318
319	if (opt == NULL)
320		return -EINVAL;
321
322	q->t_c = psched_get_time();
323	qdisc_watchdog_init(&q->watchdog, sch);
324	q->qdisc = &noop_qdisc;
325
326	return tbf_change(sch, opt);
 
 
 
 
 
327}
328
329static void tbf_destroy(struct Qdisc *sch)
330{
331	struct tbf_sched_data *q = qdisc_priv(sch);
332
333	qdisc_watchdog_cancel(&q->watchdog);
334
335	if (q->P_tab)
336		qdisc_put_rtab(q->P_tab);
337	if (q->R_tab)
338		qdisc_put_rtab(q->R_tab);
339
340	qdisc_destroy(q->qdisc);
341}
342
343static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
344{
345	struct tbf_sched_data *q = qdisc_priv(sch);
346	struct nlattr *nest;
347	struct tc_tbf_qopt opt;
348
349	sch->qstats.backlog = q->qdisc->qstats.backlog;
350	nest = nla_nest_start(skb, TCA_OPTIONS);
351	if (nest == NULL)
352		goto nla_put_failure;
353
354	opt.limit = q->limit;
355	opt.rate = q->R_tab->rate;
356	if (q->P_tab)
357		opt.peakrate = q->P_tab->rate;
358	else
359		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
360	opt.mtu = q->mtu;
361	opt.buffer = q->buffer;
362	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
363		goto nla_put_failure;
 
 
 
 
 
 
 
 
 
364
365	nla_nest_end(skb, nest);
366	return skb->len;
367
368nla_put_failure:
369	nla_nest_cancel(skb, nest);
370	return -1;
371}
372
373static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
374			  struct sk_buff *skb, struct tcmsg *tcm)
375{
376	struct tbf_sched_data *q = qdisc_priv(sch);
377
378	tcm->tcm_handle |= TC_H_MIN(1);
379	tcm->tcm_info = q->qdisc->handle;
380
381	return 0;
382}
383
384static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
385		     struct Qdisc **old)
386{
387	struct tbf_sched_data *q = qdisc_priv(sch);
388
389	if (new == NULL)
390		new = &noop_qdisc;
391
392	sch_tree_lock(sch);
393	*old = q->qdisc;
394	q->qdisc = new;
395	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
396	qdisc_reset(*old);
397	sch_tree_unlock(sch);
398
399	return 0;
400}
401
402static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
403{
404	struct tbf_sched_data *q = qdisc_priv(sch);
405	return q->qdisc;
406}
407
408static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
409{
410	return 1;
411}
412
413static void tbf_put(struct Qdisc *sch, unsigned long arg)
414{
415}
416
417static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
418{
419	if (!walker->stop) {
420		if (walker->count >= walker->skip)
421			if (walker->fn(sch, 1, walker) < 0) {
422				walker->stop = 1;
423				return;
424			}
425		walker->count++;
426	}
427}
428
429static const struct Qdisc_class_ops tbf_class_ops = {
430	.graft		=	tbf_graft,
431	.leaf		=	tbf_leaf,
432	.get		=	tbf_get,
433	.put		=	tbf_put,
434	.walk		=	tbf_walk,
435	.dump		=	tbf_dump_class,
436};
437
438static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
439	.next		=	NULL,
440	.cl_ops		=	&tbf_class_ops,
441	.id		=	"tbf",
442	.priv_size	=	sizeof(struct tbf_sched_data),
443	.enqueue	=	tbf_enqueue,
444	.dequeue	=	tbf_dequeue,
445	.peek		=	qdisc_peek_dequeued,
446	.drop		=	tbf_drop,
447	.init		=	tbf_init,
448	.reset		=	tbf_reset,
449	.destroy	=	tbf_destroy,
450	.change		=	tbf_change,
451	.dump		=	tbf_dump,
452	.owner		=	THIS_MODULE,
453};
454
455static int __init tbf_module_init(void)
456{
457	return register_qdisc(&tbf_qdisc_ops);
458}
459
460static void __exit tbf_module_exit(void)
461{
462	unregister_qdisc(&tbf_qdisc_ops);
463}
464module_init(tbf_module_init)
465module_exit(tbf_module_exit)
466MODULE_LICENSE("GPL");
v5.4
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * net/sched/sch_tbf.c	Token Bucket Filter queue.
  4 *
 
 
 
 
 
  5 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  6 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
  7 *						 original idea by Martin Devera
 
  8 */
  9
 10#include <linux/module.h>
 11#include <linux/types.h>
 12#include <linux/kernel.h>
 13#include <linux/string.h>
 14#include <linux/errno.h>
 15#include <linux/skbuff.h>
 16#include <net/netlink.h>
 17#include <net/sch_generic.h>
 18#include <net/pkt_sched.h>
 19
 20
 21/*	Simple Token Bucket Filter.
 22	=======================================
 23
 24	SOURCE.
 25	-------
 26
 27	None.
 28
 29	Description.
 30	------------
 31
 32	A data flow obeys TBF with rate R and depth B, if for any
 33	time interval t_i...t_f the number of transmitted bits
 34	does not exceed B + R*(t_f-t_i).
 35
 36	Packetized version of this definition:
 37	The sequence of packets of sizes s_i served at moments t_i
 38	obeys TBF, if for any i<=k:
 39
 40	s_i+....+s_k <= B + R*(t_k - t_i)
 41
 42	Algorithm.
 43	----------
 44
 45	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
 46
 47	N(t+delta) = min{B/R, N(t) + delta}
 48
 49	If the first packet in queue has length S, it may be
 50	transmitted only at the time t_* when S/R <= N(t_*),
 51	and in this case N(t) jumps:
 52
 53	N(t_* + 0) = N(t_* - 0) - S/R.
 54
 55
 56
 57	Actually, QoS requires two TBF to be applied to a data stream.
 58	One of them controls steady state burst size, another
 59	one with rate P (peak rate) and depth M (equal to link MTU)
 60	limits bursts at a smaller time scale.
 61
 62	It is easy to see that P>R, and B>M. If P is infinity, this double
 63	TBF is equivalent to a single one.
 64
 65	When TBF works in reshaping mode, latency is estimated as:
 66
 67	lat = max ((L-B)/R, (L-M)/P)
 68
 69
 70	NOTES.
 71	------
 72
 73	If TBF throttles, it starts a watchdog timer, which will wake it up
 74	when it is ready to transmit.
 75	Note that the minimal timer resolution is 1/HZ.
 76	If no new packets arrive during this period,
 77	or if the device is not awaken by EOI for some previous packet,
 78	TBF can stop its activity for 1/HZ.
 79
 80
 81	This means, that with depth B, the maximal rate is
 82
 83	R_crit = B*HZ
 84
 85	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
 86
 87	Note that the peak rate TBF is much more tough: with MTU 1500
 88	P_crit = 150Kbytes/sec. So, if you need greater peak
 89	rates, use alpha with HZ=1000 :-)
 90
 91	With classful TBF, limit is just kept for backwards compatibility.
 92	It is passed to the default bfifo qdisc - if the inner qdisc is
 93	changed the limit is not effective anymore.
 94*/
 95
 96struct tbf_sched_data {
 97/* Parameters */
 98	u32		limit;		/* Maximal length of backlog: bytes */
 
 
 99	u32		max_size;
100	s64		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
101	s64		mtu;
102	struct psched_ratecfg rate;
103	struct psched_ratecfg peak;
104
105/* Variables */
106	s64	tokens;			/* Current number of B tokens */
107	s64	ptokens;		/* Current number of P tokens */
108	s64	t_c;			/* Time check-point */
109	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
110	struct qdisc_watchdog watchdog;	/* Watchdog timer */
111};
112
 
 
113
114/* Time to Length, convert time in ns to length in bytes
115 * to determinate how many bytes can be sent in given time.
116 */
117static u64 psched_ns_t2l(const struct psched_ratecfg *r,
118			 u64 time_in_ns)
119{
120	/* The formula is :
121	 * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC
122	 */
123	u64 len = time_in_ns * r->rate_bytes_ps;
124
125	do_div(len, NSEC_PER_SEC);
126
127	if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) {
128		do_div(len, 53);
129		len = len * 48;
130	}
131
132	if (len > r->overhead)
133		len -= r->overhead;
134	else
135		len = 0;
136
137	return len;
138}
139
140/* GSO packet is too big, segment it so that tbf can transmit
141 * each segment in time
142 */
143static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
144		       struct sk_buff **to_free)
145{
146	struct tbf_sched_data *q = qdisc_priv(sch);
147	struct sk_buff *segs, *nskb;
148	netdev_features_t features = netif_skb_features(skb);
149	unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
150	int ret, nb;
151
152	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
153
154	if (IS_ERR_OR_NULL(segs))
155		return qdisc_drop(skb, sch, to_free);
156
157	nb = 0;
158	while (segs) {
159		nskb = segs->next;
160		skb_mark_not_on_list(segs);
161		qdisc_skb_cb(segs)->pkt_len = segs->len;
162		len += segs->len;
163		ret = qdisc_enqueue(segs, q->qdisc, to_free);
164		if (ret != NET_XMIT_SUCCESS) {
165			if (net_xmit_drop_count(ret))
166				qdisc_qstats_drop(sch);
167		} else {
168			nb++;
169		}
170		segs = nskb;
171	}
172	sch->q.qlen += nb;
173	if (nb > 1)
174		qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
175	consume_skb(skb);
176	return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
177}
178
179static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch,
180		       struct sk_buff **to_free)
181{
182	struct tbf_sched_data *q = qdisc_priv(sch);
183	unsigned int len = qdisc_pkt_len(skb);
184	int ret;
185
186	if (qdisc_pkt_len(skb) > q->max_size) {
187		if (skb_is_gso(skb) &&
188		    skb_gso_validate_mac_len(skb, q->max_size))
189			return tbf_segment(skb, sch, to_free);
190		return qdisc_drop(skb, sch, to_free);
191	}
192	ret = qdisc_enqueue(skb, q->qdisc, to_free);
193	if (ret != NET_XMIT_SUCCESS) {
194		if (net_xmit_drop_count(ret))
195			qdisc_qstats_drop(sch);
196		return ret;
197	}
198
199	sch->qstats.backlog += len;
200	sch->q.qlen++;
201	return NET_XMIT_SUCCESS;
202}
203
204static bool tbf_peak_present(const struct tbf_sched_data *q)
205{
206	return q->peak.rate_bytes_ps;
 
 
 
 
 
 
 
207}
208
209static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
210{
211	struct tbf_sched_data *q = qdisc_priv(sch);
212	struct sk_buff *skb;
213
214	skb = q->qdisc->ops->peek(q->qdisc);
215
216	if (skb) {
217		s64 now;
218		s64 toks;
219		s64 ptoks = 0;
220		unsigned int len = qdisc_pkt_len(skb);
221
222		now = ktime_get_ns();
223		toks = min_t(s64, now - q->t_c, q->buffer);
224
225		if (tbf_peak_present(q)) {
226			ptoks = toks + q->ptokens;
227			if (ptoks > q->mtu)
228				ptoks = q->mtu;
229			ptoks -= (s64) psched_l2t_ns(&q->peak, len);
230		}
231		toks += q->tokens;
232		if (toks > q->buffer)
233			toks = q->buffer;
234		toks -= (s64) psched_l2t_ns(&q->rate, len);
235
236		if ((toks|ptoks) >= 0) {
237			skb = qdisc_dequeue_peeked(q->qdisc);
238			if (unlikely(!skb))
239				return NULL;
240
241			q->t_c = now;
242			q->tokens = toks;
243			q->ptokens = ptoks;
244			qdisc_qstats_backlog_dec(sch, skb);
245			sch->q.qlen--;
 
246			qdisc_bstats_update(sch, skb);
247			return skb;
248		}
249
250		qdisc_watchdog_schedule_ns(&q->watchdog,
251					   now + max_t(long, -toks, -ptoks));
252
253		/* Maybe we have a shorter packet in the queue,
254		   which can be sent now. It sounds cool,
255		   but, however, this is wrong in principle.
256		   We MUST NOT reorder packets under these circumstances.
257
258		   Really, if we split the flow into independent
259		   subflows, it would be a very good solution.
260		   This is the main idea of all FQ algorithms
261		   (cf. CSZ, HPFQ, HFSC)
262		 */
263
264		qdisc_qstats_overlimit(sch);
265	}
266	return NULL;
267}
268
269static void tbf_reset(struct Qdisc *sch)
270{
271	struct tbf_sched_data *q = qdisc_priv(sch);
272
273	qdisc_reset(q->qdisc);
274	sch->qstats.backlog = 0;
275	sch->q.qlen = 0;
276	q->t_c = ktime_get_ns();
277	q->tokens = q->buffer;
278	q->ptokens = q->mtu;
279	qdisc_watchdog_cancel(&q->watchdog);
280}
281
282static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
283	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
284	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
285	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
286	[TCA_TBF_RATE64]	= { .type = NLA_U64 },
287	[TCA_TBF_PRATE64]	= { .type = NLA_U64 },
288	[TCA_TBF_BURST] = { .type = NLA_U32 },
289	[TCA_TBF_PBURST] = { .type = NLA_U32 },
290};
291
292static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
293		      struct netlink_ext_ack *extack)
294{
295	int err;
296	struct tbf_sched_data *q = qdisc_priv(sch);
297	struct nlattr *tb[TCA_TBF_MAX + 1];
298	struct tc_tbf_qopt *qopt;
 
 
299	struct Qdisc *child = NULL;
300	struct psched_ratecfg rate;
301	struct psched_ratecfg peak;
302	u64 max_size;
303	s64 buffer, mtu;
304	u64 rate64 = 0, prate64 = 0;
305
306	err = nla_parse_nested_deprecated(tb, TCA_TBF_MAX, opt, tbf_policy,
307					  NULL);
308	if (err < 0)
309		return err;
310
311	err = -EINVAL;
312	if (tb[TCA_TBF_PARMS] == NULL)
313		goto done;
314
315	qopt = nla_data(tb[TCA_TBF_PARMS]);
316	if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
317		qdisc_put_rtab(qdisc_get_rtab(&qopt->rate,
318					      tb[TCA_TBF_RTAB],
319					      NULL));
320
321	if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE)
322			qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
323						      tb[TCA_TBF_PTAB],
324						      NULL));
325
326	buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
327	mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
328
329	if (tb[TCA_TBF_RATE64])
330		rate64 = nla_get_u64(tb[TCA_TBF_RATE64]);
331	psched_ratecfg_precompute(&rate, &qopt->rate, rate64);
332
333	if (tb[TCA_TBF_BURST]) {
334		max_size = nla_get_u32(tb[TCA_TBF_BURST]);
335		buffer = psched_l2t_ns(&rate, max_size);
336	} else {
337		max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U);
338	}
339
340	if (qopt->peakrate.rate) {
341		if (tb[TCA_TBF_PRATE64])
342			prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]);
343		psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64);
344		if (peak.rate_bytes_ps <= rate.rate_bytes_ps) {
345			pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n",
346					peak.rate_bytes_ps, rate.rate_bytes_ps);
347			err = -EINVAL;
348			goto done;
349		}
350
351		if (tb[TCA_TBF_PBURST]) {
352			u32 pburst = nla_get_u32(tb[TCA_TBF_PBURST]);
353			max_size = min_t(u32, max_size, pburst);
354			mtu = psched_l2t_ns(&peak, pburst);
355		} else {
356			max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu));
357		}
358	} else {
359		memset(&peak, 0, sizeof(peak));
 
 
 
 
360	}
361
362	if (max_size < psched_mtu(qdisc_dev(sch)))
363		pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n",
364				    max_size, qdisc_dev(sch)->name,
365				    psched_mtu(qdisc_dev(sch)));
366
367	if (!max_size) {
368		err = -EINVAL;
369		goto done;
370	}
371
372	if (q->qdisc != &noop_qdisc) {
373		err = fifo_set_limit(q->qdisc, qopt->limit);
374		if (err)
375			goto done;
376	} else if (qopt->limit > 0) {
377		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit,
378					 extack);
379		if (IS_ERR(child)) {
380			err = PTR_ERR(child);
381			goto done;
382		}
383
384		/* child is fifo, no need to check for noop_qdisc */
385		qdisc_hash_add(child, true);
386	}
387
388	sch_tree_lock(sch);
389	if (child) {
390		qdisc_tree_flush_backlog(q->qdisc);
391		qdisc_put(q->qdisc);
392		q->qdisc = child;
393	}
394	q->limit = qopt->limit;
395	if (tb[TCA_TBF_PBURST])
396		q->mtu = mtu;
397	else
398		q->mtu = PSCHED_TICKS2NS(qopt->mtu);
399	q->max_size = max_size;
400	if (tb[TCA_TBF_BURST])
401		q->buffer = buffer;
402	else
403		q->buffer = PSCHED_TICKS2NS(qopt->buffer);
404	q->tokens = q->buffer;
405	q->ptokens = q->mtu;
406
407	memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg));
408	memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg));
409
410	sch_tree_unlock(sch);
411	err = 0;
412done:
 
 
 
 
413	return err;
414}
415
416static int tbf_init(struct Qdisc *sch, struct nlattr *opt,
417		    struct netlink_ext_ack *extack)
418{
419	struct tbf_sched_data *q = qdisc_priv(sch);
420
 
 
 
 
421	qdisc_watchdog_init(&q->watchdog, sch);
422	q->qdisc = &noop_qdisc;
423
424	if (!opt)
425		return -EINVAL;
426
427	q->t_c = ktime_get_ns();
428
429	return tbf_change(sch, opt, extack);
430}
431
432static void tbf_destroy(struct Qdisc *sch)
433{
434	struct tbf_sched_data *q = qdisc_priv(sch);
435
436	qdisc_watchdog_cancel(&q->watchdog);
437	qdisc_put(q->qdisc);
 
 
 
 
 
 
438}
439
440static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
441{
442	struct tbf_sched_data *q = qdisc_priv(sch);
443	struct nlattr *nest;
444	struct tc_tbf_qopt opt;
445
446	sch->qstats.backlog = q->qdisc->qstats.backlog;
447	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
448	if (nest == NULL)
449		goto nla_put_failure;
450
451	opt.limit = q->limit;
452	psched_ratecfg_getrate(&opt.rate, &q->rate);
453	if (tbf_peak_present(q))
454		psched_ratecfg_getrate(&opt.peakrate, &q->peak);
455	else
456		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
457	opt.mtu = PSCHED_NS2TICKS(q->mtu);
458	opt.buffer = PSCHED_NS2TICKS(q->buffer);
459	if (nla_put(skb, TCA_TBF_PARMS, sizeof(opt), &opt))
460		goto nla_put_failure;
461	if (q->rate.rate_bytes_ps >= (1ULL << 32) &&
462	    nla_put_u64_64bit(skb, TCA_TBF_RATE64, q->rate.rate_bytes_ps,
463			      TCA_TBF_PAD))
464		goto nla_put_failure;
465	if (tbf_peak_present(q) &&
466	    q->peak.rate_bytes_ps >= (1ULL << 32) &&
467	    nla_put_u64_64bit(skb, TCA_TBF_PRATE64, q->peak.rate_bytes_ps,
468			      TCA_TBF_PAD))
469		goto nla_put_failure;
470
471	return nla_nest_end(skb, nest);
 
472
473nla_put_failure:
474	nla_nest_cancel(skb, nest);
475	return -1;
476}
477
478static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
479			  struct sk_buff *skb, struct tcmsg *tcm)
480{
481	struct tbf_sched_data *q = qdisc_priv(sch);
482
483	tcm->tcm_handle |= TC_H_MIN(1);
484	tcm->tcm_info = q->qdisc->handle;
485
486	return 0;
487}
488
489static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
490		     struct Qdisc **old, struct netlink_ext_ack *extack)
491{
492	struct tbf_sched_data *q = qdisc_priv(sch);
493
494	if (new == NULL)
495		new = &noop_qdisc;
496
497	*old = qdisc_replace(sch, new, &q->qdisc);
 
 
 
 
 
 
498	return 0;
499}
500
501static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
502{
503	struct tbf_sched_data *q = qdisc_priv(sch);
504	return q->qdisc;
505}
506
507static unsigned long tbf_find(struct Qdisc *sch, u32 classid)
508{
509	return 1;
510}
511
 
 
 
 
512static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
513{
514	if (!walker->stop) {
515		if (walker->count >= walker->skip)
516			if (walker->fn(sch, 1, walker) < 0) {
517				walker->stop = 1;
518				return;
519			}
520		walker->count++;
521	}
522}
523
524static const struct Qdisc_class_ops tbf_class_ops = {
525	.graft		=	tbf_graft,
526	.leaf		=	tbf_leaf,
527	.find		=	tbf_find,
 
528	.walk		=	tbf_walk,
529	.dump		=	tbf_dump_class,
530};
531
532static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
533	.next		=	NULL,
534	.cl_ops		=	&tbf_class_ops,
535	.id		=	"tbf",
536	.priv_size	=	sizeof(struct tbf_sched_data),
537	.enqueue	=	tbf_enqueue,
538	.dequeue	=	tbf_dequeue,
539	.peek		=	qdisc_peek_dequeued,
 
540	.init		=	tbf_init,
541	.reset		=	tbf_reset,
542	.destroy	=	tbf_destroy,
543	.change		=	tbf_change,
544	.dump		=	tbf_dump,
545	.owner		=	THIS_MODULE,
546};
547
548static int __init tbf_module_init(void)
549{
550	return register_qdisc(&tbf_qdisc_ops);
551}
552
553static void __exit tbf_module_exit(void)
554{
555	unregister_qdisc(&tbf_qdisc_ops);
556}
557module_init(tbf_module_init)
558module_exit(tbf_module_exit)
559MODULE_LICENSE("GPL");