Linux Audio

Check our new training course

Loading...
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * net/sched/sch_ets.c         Enhanced Transmission Selection scheduler
  4 *
  5 * Description
  6 * -----------
  7 *
  8 * The Enhanced Transmission Selection scheduler is a classful queuing
  9 * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler.
 10 * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to
 11 * implement the transmission selection described in 802.1Qaz.
 12 *
 13 * Although ETS is technically classful, it's not possible to add and remove
 14 * classes at will. Instead one specifies number of classes, how many are
 15 * PRIO-like and how many DRR-like, and quanta for the latter.
 16 *
 17 * Algorithm
 18 * ---------
 19 *
 20 * The strict classes, if any, are tried for traffic first: first band 0, if it
 21 * has no traffic then band 1, etc.
 22 *
 23 * When there is no traffic in any of the strict queues, the bandwidth-sharing
 24 * ones are tried next. Each band is assigned a deficit counter, initialized to
 25 * "quantum" of that band. ETS maintains a list of active bandwidth-sharing
 26 * bands whose qdiscs are non-empty. A packet is dequeued from the band at the
 27 * head of the list if the packet size is smaller or equal to the deficit
 28 * counter. If the counter is too small, it is increased by "quantum" and the
 29 * scheduler moves on to the next band in the active list.
 30 */
 31
 32#include <linux/module.h>
 33#include <net/gen_stats.h>
 34#include <net/netlink.h>
 35#include <net/pkt_cls.h>
 36#include <net/pkt_sched.h>
 37#include <net/sch_generic.h>
 38
 39struct ets_class {
 40	struct list_head alist; /* In struct ets_sched.active. */
 41	struct Qdisc *qdisc;
 42	u32 quantum;
 43	u32 deficit;
 44	struct gnet_stats_basic_sync bstats;
 45	struct gnet_stats_queue qstats;
 46};
 47
 48struct ets_sched {
 49	struct list_head active;
 50	struct tcf_proto __rcu *filter_list;
 51	struct tcf_block *block;
 52	unsigned int nbands;
 53	unsigned int nstrict;
 54	u8 prio2band[TC_PRIO_MAX + 1];
 55	struct ets_class classes[TCQ_ETS_MAX_BANDS];
 56};
 57
 58static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = {
 59	[TCA_ETS_NBANDS] = { .type = NLA_U8 },
 60	[TCA_ETS_NSTRICT] = { .type = NLA_U8 },
 61	[TCA_ETS_QUANTA] = { .type = NLA_NESTED },
 62	[TCA_ETS_PRIOMAP] = { .type = NLA_NESTED },
 63};
 64
 65static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = {
 66	[TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 },
 67};
 68
 69static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = {
 70	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
 71};
 72
 73static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
 74	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
 75};
 76
 77static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
 78			     unsigned int *quantum,
 79			     struct netlink_ext_ack *extack)
 80{
 81	*quantum = nla_get_u32(attr);
 82	if (!*quantum) {
 83		NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero");
 84		return -EINVAL;
 85	}
 86	return 0;
 87}
 88
 89static struct ets_class *
 90ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
 91{
 92	struct ets_sched *q = qdisc_priv(sch);
 93
 94	if (arg == 0 || arg > q->nbands)
 95		return NULL;
 96	return &q->classes[arg - 1];
 97}
 98
 99static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl)
100{
101	struct ets_sched *q = qdisc_priv(sch);
102	int band = cl - q->classes;
103
104	return TC_H_MAKE(sch->handle, band + 1);
105}
106
107static void ets_offload_change(struct Qdisc *sch)
108{
109	struct net_device *dev = qdisc_dev(sch);
110	struct ets_sched *q = qdisc_priv(sch);
111	struct tc_ets_qopt_offload qopt;
112	unsigned int w_psum_prev = 0;
113	unsigned int q_psum = 0;
114	unsigned int q_sum = 0;
115	unsigned int quantum;
116	unsigned int w_psum;
117	unsigned int weight;
118	unsigned int i;
119
120	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
121		return;
122
123	qopt.command = TC_ETS_REPLACE;
124	qopt.handle = sch->handle;
125	qopt.parent = sch->parent;
126	qopt.replace_params.bands = q->nbands;
127	qopt.replace_params.qstats = &sch->qstats;
128	memcpy(&qopt.replace_params.priomap,
129	       q->prio2band, sizeof(q->prio2band));
130
131	for (i = 0; i < q->nbands; i++)
132		q_sum += q->classes[i].quantum;
133
134	for (i = 0; i < q->nbands; i++) {
135		quantum = q->classes[i].quantum;
136		q_psum += quantum;
137		w_psum = quantum ? q_psum * 100 / q_sum : 0;
138		weight = w_psum - w_psum_prev;
139		w_psum_prev = w_psum;
140
141		qopt.replace_params.quanta[i] = quantum;
142		qopt.replace_params.weights[i] = weight;
143	}
144
145	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
146}
147
148static void ets_offload_destroy(struct Qdisc *sch)
149{
150	struct net_device *dev = qdisc_dev(sch);
151	struct tc_ets_qopt_offload qopt;
152
153	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
154		return;
155
156	qopt.command = TC_ETS_DESTROY;
157	qopt.handle = sch->handle;
158	qopt.parent = sch->parent;
159	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
160}
161
162static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
163			      struct Qdisc *old, unsigned long arg,
164			      struct netlink_ext_ack *extack)
165{
166	struct net_device *dev = qdisc_dev(sch);
167	struct tc_ets_qopt_offload qopt;
168
169	qopt.command = TC_ETS_GRAFT;
170	qopt.handle = sch->handle;
171	qopt.parent = sch->parent;
172	qopt.graft_params.band = arg - 1;
173	qopt.graft_params.child_handle = new->handle;
174
175	qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS,
176				   &qopt, extack);
177}
178
179static int ets_offload_dump(struct Qdisc *sch)
180{
181	struct tc_ets_qopt_offload qopt;
182
183	qopt.command = TC_ETS_STATS;
184	qopt.handle = sch->handle;
185	qopt.parent = sch->parent;
186	qopt.stats.bstats = &sch->bstats;
187	qopt.stats.qstats = &sch->qstats;
188
189	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
190}
191
192static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl)
193{
194	unsigned int band = cl - q->classes;
195
196	return band < q->nstrict;
197}
198
199static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid,
200			    struct nlattr **tca, unsigned long *arg,
201			    struct netlink_ext_ack *extack)
202{
203	struct ets_class *cl = ets_class_from_arg(sch, *arg);
204	struct ets_sched *q = qdisc_priv(sch);
205	struct nlattr *opt = tca[TCA_OPTIONS];
206	struct nlattr *tb[TCA_ETS_MAX + 1];
207	unsigned int quantum;
208	int err;
209
210	/* Classes can be added and removed only through Qdisc_ops.change
211	 * interface.
212	 */
213	if (!cl) {
214		NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported");
215		return -EOPNOTSUPP;
216	}
217
218	if (!opt) {
219		NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
220		return -EINVAL;
221	}
222
223	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack);
224	if (err < 0)
225		return err;
226
227	if (!tb[TCA_ETS_QUANTA_BAND])
228		/* Nothing to configure. */
229		return 0;
230
231	if (ets_class_is_strict(q, cl)) {
232		NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum");
233		return -EINVAL;
234	}
235
236	err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum,
237				extack);
238	if (err)
239		return err;
240
241	sch_tree_lock(sch);
242	cl->quantum = quantum;
243	sch_tree_unlock(sch);
244
245	ets_offload_change(sch);
246	return 0;
247}
248
249static int ets_class_graft(struct Qdisc *sch, unsigned long arg,
250			   struct Qdisc *new, struct Qdisc **old,
251			   struct netlink_ext_ack *extack)
252{
253	struct ets_class *cl = ets_class_from_arg(sch, arg);
254
255	if (!new) {
256		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
257					ets_class_id(sch, cl), NULL);
258		if (!new)
259			new = &noop_qdisc;
260		else
261			qdisc_hash_add(new, true);
262	}
263
264	*old = qdisc_replace(sch, new, &cl->qdisc);
265	ets_offload_graft(sch, new, *old, arg, extack);
266	return 0;
267}
268
269static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg)
270{
271	struct ets_class *cl = ets_class_from_arg(sch, arg);
272
273	return cl->qdisc;
274}
275
276static unsigned long ets_class_find(struct Qdisc *sch, u32 classid)
277{
278	unsigned long band = TC_H_MIN(classid);
279	struct ets_sched *q = qdisc_priv(sch);
280
281	if (band - 1 >= q->nbands)
282		return 0;
283	return band;
284}
285
286static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
287{
288	struct ets_class *cl = ets_class_from_arg(sch, arg);
289	struct ets_sched *q = qdisc_priv(sch);
290
291	/* We get notified about zero-length child Qdiscs as well if they are
292	 * offloaded. Those aren't on the active list though, so don't attempt
293	 * to remove them.
294	 */
295	if (!ets_class_is_strict(q, cl) && sch->q.qlen)
296		list_del(&cl->alist);
297}
298
299static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
300			  struct sk_buff *skb, struct tcmsg *tcm)
301{
302	struct ets_class *cl = ets_class_from_arg(sch, arg);
303	struct ets_sched *q = qdisc_priv(sch);
304	struct nlattr *nest;
305
306	tcm->tcm_parent = TC_H_ROOT;
307	tcm->tcm_handle = ets_class_id(sch, cl);
308	tcm->tcm_info = cl->qdisc->handle;
309
310	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
311	if (!nest)
312		goto nla_put_failure;
313	if (!ets_class_is_strict(q, cl)) {
314		if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, cl->quantum))
315			goto nla_put_failure;
316	}
317	return nla_nest_end(skb, nest);
318
319nla_put_failure:
320	nla_nest_cancel(skb, nest);
321	return -EMSGSIZE;
322}
323
324static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
325				struct gnet_dump *d)
326{
327	struct ets_class *cl = ets_class_from_arg(sch, arg);
328	struct Qdisc *cl_q = cl->qdisc;
329
330	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
331	    qdisc_qstats_copy(d, cl_q) < 0)
332		return -1;
333
334	return 0;
335}
336
337static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
338{
339	struct ets_sched *q = qdisc_priv(sch);
340	int i;
341
342	if (arg->stop)
343		return;
344
345	for (i = 0; i < q->nbands; i++) {
346		if (!tc_qdisc_stats_dump(sch, i + 1, arg))
347			break;
348	}
349}
350
351static struct tcf_block *
352ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl,
353		    struct netlink_ext_ack *extack)
354{
355	struct ets_sched *q = qdisc_priv(sch);
356
357	if (cl) {
358		NL_SET_ERR_MSG(extack, "ETS classid must be zero");
359		return NULL;
360	}
361
362	return q->block;
363}
364
365static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent,
366					u32 classid)
367{
368	return ets_class_find(sch, classid);
369}
370
371static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
372{
373}
374
375static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
376				      int *qerr)
377{
378	struct ets_sched *q = qdisc_priv(sch);
379	u32 band = skb->priority;
380	struct tcf_result res;
381	struct tcf_proto *fl;
382	int err;
383
384	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
385	if (TC_H_MAJ(skb->priority) != sch->handle) {
386		fl = rcu_dereference_bh(q->filter_list);
387		err = tcf_classify(skb, NULL, fl, &res, false);
388#ifdef CONFIG_NET_CLS_ACT
389		switch (err) {
390		case TC_ACT_STOLEN:
391		case TC_ACT_QUEUED:
392		case TC_ACT_TRAP:
393			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
394			fallthrough;
395		case TC_ACT_SHOT:
396			return NULL;
397		}
398#endif
399		if (!fl || err < 0) {
400			if (TC_H_MAJ(band))
401				band = 0;
402			return &q->classes[q->prio2band[band & TC_PRIO_MAX]];
403		}
404		band = res.classid;
405	}
406	band = TC_H_MIN(band) - 1;
407	if (band >= q->nbands)
408		return &q->classes[q->prio2band[0]];
409	return &q->classes[band];
410}
411
412static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
413			     struct sk_buff **to_free)
414{
415	unsigned int len = qdisc_pkt_len(skb);
416	struct ets_sched *q = qdisc_priv(sch);
417	struct ets_class *cl;
418	int err = 0;
419	bool first;
420
421	cl = ets_classify(skb, sch, &err);
422	if (!cl) {
423		if (err & __NET_XMIT_BYPASS)
424			qdisc_qstats_drop(sch);
425		__qdisc_drop(skb, to_free);
426		return err;
427	}
428
429	first = !cl->qdisc->q.qlen;
430	err = qdisc_enqueue(skb, cl->qdisc, to_free);
431	if (unlikely(err != NET_XMIT_SUCCESS)) {
432		if (net_xmit_drop_count(err)) {
433			cl->qstats.drops++;
434			qdisc_qstats_drop(sch);
435		}
436		return err;
437	}
438
439	if (first && !ets_class_is_strict(q, cl)) {
440		list_add_tail(&cl->alist, &q->active);
441		cl->deficit = cl->quantum;
442	}
443
444	sch->qstats.backlog += len;
445	sch->q.qlen++;
446	return err;
447}
448
449static struct sk_buff *
450ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
451{
452	qdisc_bstats_update(sch, skb);
453	qdisc_qstats_backlog_dec(sch, skb);
454	sch->q.qlen--;
455	return skb;
456}
457
458static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
459{
460	struct ets_sched *q = qdisc_priv(sch);
461	struct ets_class *cl;
462	struct sk_buff *skb;
463	unsigned int band;
464	unsigned int len;
465
466	while (1) {
467		for (band = 0; band < q->nstrict; band++) {
468			cl = &q->classes[band];
469			skb = qdisc_dequeue_peeked(cl->qdisc);
470			if (skb)
471				return ets_qdisc_dequeue_skb(sch, skb);
472		}
473
474		if (list_empty(&q->active))
475			goto out;
476
477		cl = list_first_entry(&q->active, struct ets_class, alist);
478		skb = cl->qdisc->ops->peek(cl->qdisc);
479		if (!skb) {
480			qdisc_warn_nonwc(__func__, cl->qdisc);
481			goto out;
482		}
483
484		len = qdisc_pkt_len(skb);
485		if (len <= cl->deficit) {
486			cl->deficit -= len;
487			skb = qdisc_dequeue_peeked(cl->qdisc);
488			if (unlikely(!skb))
489				goto out;
490			if (cl->qdisc->q.qlen == 0)
491				list_del(&cl->alist);
492			return ets_qdisc_dequeue_skb(sch, skb);
493		}
494
495		cl->deficit += cl->quantum;
496		list_move_tail(&cl->alist, &q->active);
497	}
498out:
499	return NULL;
500}
501
502static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr,
503				   unsigned int nbands, u8 *priomap,
504				   struct netlink_ext_ack *extack)
505{
506	const struct nlattr *attr;
507	int prio = 0;
508	u8 band;
509	int rem;
510	int err;
511
512	err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX,
513				    ets_priomap_policy, NL_VALIDATE_STRICT,
514				    extack);
515	if (err)
516		return err;
517
518	nla_for_each_nested(attr, priomap_attr, rem) {
519		switch (nla_type(attr)) {
520		case TCA_ETS_PRIOMAP_BAND:
521			if (prio > TC_PRIO_MAX) {
522				NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap");
523				return -EINVAL;
524			}
525			band = nla_get_u8(attr);
526			if (band >= nbands) {
527				NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap");
528				return -EINVAL;
529			}
530			priomap[prio++] = band;
531			break;
532		default:
533			WARN_ON_ONCE(1); /* Validate should have caught this. */
534			return -EINVAL;
535		}
536	}
537
538	return 0;
539}
540
541static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr,
542				  unsigned int nbands, unsigned int nstrict,
543				  unsigned int *quanta,
544				  struct netlink_ext_ack *extack)
545{
546	const struct nlattr *attr;
547	int band = nstrict;
548	int rem;
549	int err;
550
551	err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX,
552				    ets_quanta_policy, NL_VALIDATE_STRICT,
553				    extack);
554	if (err < 0)
555		return err;
556
557	nla_for_each_nested(attr, quanta_attr, rem) {
558		switch (nla_type(attr)) {
559		case TCA_ETS_QUANTA_BAND:
560			if (band >= nbands) {
561				NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands");
562				return -EINVAL;
563			}
564			err = ets_quantum_parse(sch, attr, &quanta[band++],
565						extack);
566			if (err)
567				return err;
568			break;
569		default:
570			WARN_ON_ONCE(1); /* Validate should have caught this. */
571			return -EINVAL;
572		}
573	}
574
575	return 0;
576}
577
578static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
579			    struct netlink_ext_ack *extack)
580{
581	unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0};
582	struct Qdisc *queues[TCQ_ETS_MAX_BANDS];
583	struct ets_sched *q = qdisc_priv(sch);
584	struct nlattr *tb[TCA_ETS_MAX + 1];
585	unsigned int oldbands = q->nbands;
586	u8 priomap[TC_PRIO_MAX + 1];
587	unsigned int nstrict = 0;
588	unsigned int nbands;
589	unsigned int i;
590	int err;
591
592	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
593	if (err < 0)
594		return err;
595
596	if (!tb[TCA_ETS_NBANDS]) {
597		NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument");
598		return -EINVAL;
599	}
600	nbands = nla_get_u8(tb[TCA_ETS_NBANDS]);
601	if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) {
602		NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands");
603		return -EINVAL;
604	}
605	/* Unless overridden, traffic goes to the last band. */
606	memset(priomap, nbands - 1, sizeof(priomap));
607
608	if (tb[TCA_ETS_NSTRICT]) {
609		nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]);
610		if (nstrict > nbands) {
611			NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands");
612			return -EINVAL;
613		}
614	}
615
616	if (tb[TCA_ETS_PRIOMAP]) {
617		err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP],
618					      nbands, priomap, extack);
619		if (err)
620			return err;
621	}
622
623	if (tb[TCA_ETS_QUANTA]) {
624		err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA],
625					     nbands, nstrict, quanta, extack);
626		if (err)
627			return err;
628	}
629	/* If there are more bands than strict + quanta provided, the remaining
630	 * ones are ETS with quantum of MTU. Initialize the missing values here.
631	 */
632	for (i = nstrict; i < nbands; i++) {
633		if (!quanta[i])
634			quanta[i] = psched_mtu(qdisc_dev(sch));
635	}
636
637	/* Before commit, make sure we can allocate all new qdiscs */
638	for (i = oldbands; i < nbands; i++) {
639		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
640					      ets_class_id(sch, &q->classes[i]),
641					      extack);
642		if (!queues[i]) {
643			while (i > oldbands)
644				qdisc_put(queues[--i]);
645			return -ENOMEM;
646		}
647	}
648
649	sch_tree_lock(sch);
650
651	WRITE_ONCE(q->nbands, nbands);
652	for (i = nstrict; i < q->nstrict; i++) {
653		if (q->classes[i].qdisc->q.qlen) {
654			list_add_tail(&q->classes[i].alist, &q->active);
655			q->classes[i].deficit = quanta[i];
656		}
657	}
658	for (i = q->nbands; i < oldbands; i++) {
659		if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
660			list_del(&q->classes[i].alist);
661		qdisc_tree_flush_backlog(q->classes[i].qdisc);
662	}
663	WRITE_ONCE(q->nstrict, nstrict);
664	memcpy(q->prio2band, priomap, sizeof(priomap));
665
666	for (i = 0; i < q->nbands; i++)
667		WRITE_ONCE(q->classes[i].quantum, quanta[i]);
668
669	for (i = oldbands; i < q->nbands; i++) {
670		q->classes[i].qdisc = queues[i];
671		if (q->classes[i].qdisc != &noop_qdisc)
672			qdisc_hash_add(q->classes[i].qdisc, true);
673	}
674
675	sch_tree_unlock(sch);
676
677	ets_offload_change(sch);
678	for (i = q->nbands; i < oldbands; i++) {
679		qdisc_put(q->classes[i].qdisc);
680		q->classes[i].qdisc = NULL;
681		WRITE_ONCE(q->classes[i].quantum, 0);
682		q->classes[i].deficit = 0;
683		gnet_stats_basic_sync_init(&q->classes[i].bstats);
684		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
685	}
686	return 0;
687}
688
689static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
690			  struct netlink_ext_ack *extack)
691{
692	struct ets_sched *q = qdisc_priv(sch);
693	int err, i;
694
695	if (!opt)
696		return -EINVAL;
697
698	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
699	if (err)
700		return err;
701
702	INIT_LIST_HEAD(&q->active);
703	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
704		INIT_LIST_HEAD(&q->classes[i].alist);
705
706	return ets_qdisc_change(sch, opt, extack);
707}
708
709static void ets_qdisc_reset(struct Qdisc *sch)
710{
711	struct ets_sched *q = qdisc_priv(sch);
712	int band;
713
714	for (band = q->nstrict; band < q->nbands; band++) {
715		if (q->classes[band].qdisc->q.qlen)
716			list_del(&q->classes[band].alist);
717	}
718	for (band = 0; band < q->nbands; band++)
719		qdisc_reset(q->classes[band].qdisc);
720}
721
722static void ets_qdisc_destroy(struct Qdisc *sch)
723{
724	struct ets_sched *q = qdisc_priv(sch);
725	int band;
726
727	ets_offload_destroy(sch);
728	tcf_block_put(q->block);
729	for (band = 0; band < q->nbands; band++)
730		qdisc_put(q->classes[band].qdisc);
731}
732
733static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
734{
735	struct ets_sched *q = qdisc_priv(sch);
736	struct nlattr *opts;
737	struct nlattr *nest;
738	u8 nbands, nstrict;
739	int band;
740	int prio;
741	int err;
742
743	err = ets_offload_dump(sch);
744	if (err)
745		return err;
746
747	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
748	if (!opts)
749		goto nla_err;
750
751	nbands = READ_ONCE(q->nbands);
752	if (nla_put_u8(skb, TCA_ETS_NBANDS, nbands))
753		goto nla_err;
754
755	nstrict = READ_ONCE(q->nstrict);
756	if (nstrict && nla_put_u8(skb, TCA_ETS_NSTRICT, nstrict))
757		goto nla_err;
758
759	if (nbands > nstrict) {
760		nest = nla_nest_start(skb, TCA_ETS_QUANTA);
761		if (!nest)
762			goto nla_err;
763
764		for (band = nstrict; band < nbands; band++) {
765			if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
766					READ_ONCE(q->classes[band].quantum)))
767				goto nla_err;
768		}
769
770		nla_nest_end(skb, nest);
771	}
772
773	nest = nla_nest_start(skb, TCA_ETS_PRIOMAP);
774	if (!nest)
775		goto nla_err;
776
777	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
778		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND,
779			       READ_ONCE(q->prio2band[prio])))
780			goto nla_err;
781	}
782
783	nla_nest_end(skb, nest);
784
785	return nla_nest_end(skb, opts);
786
787nla_err:
788	nla_nest_cancel(skb, opts);
789	return -EMSGSIZE;
790}
791
792static const struct Qdisc_class_ops ets_class_ops = {
793	.change		= ets_class_change,
794	.graft		= ets_class_graft,
795	.leaf		= ets_class_leaf,
796	.find		= ets_class_find,
797	.qlen_notify	= ets_class_qlen_notify,
798	.dump		= ets_class_dump,
799	.dump_stats	= ets_class_dump_stats,
800	.walk		= ets_qdisc_walk,
801	.tcf_block	= ets_qdisc_tcf_block,
802	.bind_tcf	= ets_qdisc_bind_tcf,
803	.unbind_tcf	= ets_qdisc_unbind_tcf,
804};
805
806static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
807	.cl_ops		= &ets_class_ops,
808	.id		= "ets",
809	.priv_size	= sizeof(struct ets_sched),
810	.enqueue	= ets_qdisc_enqueue,
811	.dequeue	= ets_qdisc_dequeue,
812	.peek		= qdisc_peek_dequeued,
813	.change		= ets_qdisc_change,
814	.init		= ets_qdisc_init,
815	.reset		= ets_qdisc_reset,
816	.destroy	= ets_qdisc_destroy,
817	.dump		= ets_qdisc_dump,
818	.owner		= THIS_MODULE,
819};
820MODULE_ALIAS_NET_SCH("ets");
821
822static int __init ets_init(void)
823{
824	return register_qdisc(&ets_qdisc_ops);
825}
826
827static void __exit ets_exit(void)
828{
829	unregister_qdisc(&ets_qdisc_ops);
830}
831
832module_init(ets_init);
833module_exit(ets_exit);
834MODULE_LICENSE("GPL");
835MODULE_DESCRIPTION("Enhanced Transmission Selection(ETS) scheduler");
v6.2
  1// SPDX-License-Identifier: GPL-2.0-only
  2/*
  3 * net/sched/sch_ets.c         Enhanced Transmission Selection scheduler
  4 *
  5 * Description
  6 * -----------
  7 *
  8 * The Enhanced Transmission Selection scheduler is a classful queuing
  9 * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler.
 10 * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to
 11 * implement the transmission selection described in 802.1Qaz.
 12 *
 13 * Although ETS is technically classful, it's not possible to add and remove
 14 * classes at will. Instead one specifies number of classes, how many are
 15 * PRIO-like and how many DRR-like, and quanta for the latter.
 16 *
 17 * Algorithm
 18 * ---------
 19 *
 20 * The strict classes, if any, are tried for traffic first: first band 0, if it
 21 * has no traffic then band 1, etc.
 22 *
 23 * When there is no traffic in any of the strict queues, the bandwidth-sharing
 24 * ones are tried next. Each band is assigned a deficit counter, initialized to
 25 * "quantum" of that band. ETS maintains a list of active bandwidth-sharing
 26 * bands whose qdiscs are non-empty. A packet is dequeued from the band at the
 27 * head of the list if the packet size is smaller or equal to the deficit
 28 * counter. If the counter is too small, it is increased by "quantum" and the
 29 * scheduler moves on to the next band in the active list.
 30 */
 31
 32#include <linux/module.h>
 33#include <net/gen_stats.h>
 34#include <net/netlink.h>
 35#include <net/pkt_cls.h>
 36#include <net/pkt_sched.h>
 37#include <net/sch_generic.h>
 38
 39struct ets_class {
 40	struct list_head alist; /* In struct ets_sched.active. */
 41	struct Qdisc *qdisc;
 42	u32 quantum;
 43	u32 deficit;
 44	struct gnet_stats_basic_sync bstats;
 45	struct gnet_stats_queue qstats;
 46};
 47
 48struct ets_sched {
 49	struct list_head active;
 50	struct tcf_proto __rcu *filter_list;
 51	struct tcf_block *block;
 52	unsigned int nbands;
 53	unsigned int nstrict;
 54	u8 prio2band[TC_PRIO_MAX + 1];
 55	struct ets_class classes[TCQ_ETS_MAX_BANDS];
 56};
 57
 58static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = {
 59	[TCA_ETS_NBANDS] = { .type = NLA_U8 },
 60	[TCA_ETS_NSTRICT] = { .type = NLA_U8 },
 61	[TCA_ETS_QUANTA] = { .type = NLA_NESTED },
 62	[TCA_ETS_PRIOMAP] = { .type = NLA_NESTED },
 63};
 64
 65static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = {
 66	[TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 },
 67};
 68
 69static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = {
 70	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
 71};
 72
 73static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
 74	[TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
 75};
 76
 77static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
 78			     unsigned int *quantum,
 79			     struct netlink_ext_ack *extack)
 80{
 81	*quantum = nla_get_u32(attr);
 82	if (!*quantum) {
 83		NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero");
 84		return -EINVAL;
 85	}
 86	return 0;
 87}
 88
 89static struct ets_class *
 90ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
 91{
 92	struct ets_sched *q = qdisc_priv(sch);
 93
 
 
 94	return &q->classes[arg - 1];
 95}
 96
 97static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl)
 98{
 99	struct ets_sched *q = qdisc_priv(sch);
100	int band = cl - q->classes;
101
102	return TC_H_MAKE(sch->handle, band + 1);
103}
104
105static void ets_offload_change(struct Qdisc *sch)
106{
107	struct net_device *dev = qdisc_dev(sch);
108	struct ets_sched *q = qdisc_priv(sch);
109	struct tc_ets_qopt_offload qopt;
110	unsigned int w_psum_prev = 0;
111	unsigned int q_psum = 0;
112	unsigned int q_sum = 0;
113	unsigned int quantum;
114	unsigned int w_psum;
115	unsigned int weight;
116	unsigned int i;
117
118	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
119		return;
120
121	qopt.command = TC_ETS_REPLACE;
122	qopt.handle = sch->handle;
123	qopt.parent = sch->parent;
124	qopt.replace_params.bands = q->nbands;
125	qopt.replace_params.qstats = &sch->qstats;
126	memcpy(&qopt.replace_params.priomap,
127	       q->prio2band, sizeof(q->prio2band));
128
129	for (i = 0; i < q->nbands; i++)
130		q_sum += q->classes[i].quantum;
131
132	for (i = 0; i < q->nbands; i++) {
133		quantum = q->classes[i].quantum;
134		q_psum += quantum;
135		w_psum = quantum ? q_psum * 100 / q_sum : 0;
136		weight = w_psum - w_psum_prev;
137		w_psum_prev = w_psum;
138
139		qopt.replace_params.quanta[i] = quantum;
140		qopt.replace_params.weights[i] = weight;
141	}
142
143	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
144}
145
146static void ets_offload_destroy(struct Qdisc *sch)
147{
148	struct net_device *dev = qdisc_dev(sch);
149	struct tc_ets_qopt_offload qopt;
150
151	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
152		return;
153
154	qopt.command = TC_ETS_DESTROY;
155	qopt.handle = sch->handle;
156	qopt.parent = sch->parent;
157	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
158}
159
160static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
161			      struct Qdisc *old, unsigned long arg,
162			      struct netlink_ext_ack *extack)
163{
164	struct net_device *dev = qdisc_dev(sch);
165	struct tc_ets_qopt_offload qopt;
166
167	qopt.command = TC_ETS_GRAFT;
168	qopt.handle = sch->handle;
169	qopt.parent = sch->parent;
170	qopt.graft_params.band = arg - 1;
171	qopt.graft_params.child_handle = new->handle;
172
173	qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS,
174				   &qopt, extack);
175}
176
177static int ets_offload_dump(struct Qdisc *sch)
178{
179	struct tc_ets_qopt_offload qopt;
180
181	qopt.command = TC_ETS_STATS;
182	qopt.handle = sch->handle;
183	qopt.parent = sch->parent;
184	qopt.stats.bstats = &sch->bstats;
185	qopt.stats.qstats = &sch->qstats;
186
187	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
188}
189
190static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl)
191{
192	unsigned int band = cl - q->classes;
193
194	return band < q->nstrict;
195}
196
197static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid,
198			    struct nlattr **tca, unsigned long *arg,
199			    struct netlink_ext_ack *extack)
200{
201	struct ets_class *cl = ets_class_from_arg(sch, *arg);
202	struct ets_sched *q = qdisc_priv(sch);
203	struct nlattr *opt = tca[TCA_OPTIONS];
204	struct nlattr *tb[TCA_ETS_MAX + 1];
205	unsigned int quantum;
206	int err;
207
208	/* Classes can be added and removed only through Qdisc_ops.change
209	 * interface.
210	 */
211	if (!cl) {
212		NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported");
213		return -EOPNOTSUPP;
214	}
215
216	if (!opt) {
217		NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
218		return -EINVAL;
219	}
220
221	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack);
222	if (err < 0)
223		return err;
224
225	if (!tb[TCA_ETS_QUANTA_BAND])
226		/* Nothing to configure. */
227		return 0;
228
229	if (ets_class_is_strict(q, cl)) {
230		NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum");
231		return -EINVAL;
232	}
233
234	err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum,
235				extack);
236	if (err)
237		return err;
238
239	sch_tree_lock(sch);
240	cl->quantum = quantum;
241	sch_tree_unlock(sch);
242
243	ets_offload_change(sch);
244	return 0;
245}
246
247static int ets_class_graft(struct Qdisc *sch, unsigned long arg,
248			   struct Qdisc *new, struct Qdisc **old,
249			   struct netlink_ext_ack *extack)
250{
251	struct ets_class *cl = ets_class_from_arg(sch, arg);
252
253	if (!new) {
254		new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
255					ets_class_id(sch, cl), NULL);
256		if (!new)
257			new = &noop_qdisc;
258		else
259			qdisc_hash_add(new, true);
260	}
261
262	*old = qdisc_replace(sch, new, &cl->qdisc);
263	ets_offload_graft(sch, new, *old, arg, extack);
264	return 0;
265}
266
267static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg)
268{
269	struct ets_class *cl = ets_class_from_arg(sch, arg);
270
271	return cl->qdisc;
272}
273
274static unsigned long ets_class_find(struct Qdisc *sch, u32 classid)
275{
276	unsigned long band = TC_H_MIN(classid);
277	struct ets_sched *q = qdisc_priv(sch);
278
279	if (band - 1 >= q->nbands)
280		return 0;
281	return band;
282}
283
284static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
285{
286	struct ets_class *cl = ets_class_from_arg(sch, arg);
287	struct ets_sched *q = qdisc_priv(sch);
288
289	/* We get notified about zero-length child Qdiscs as well if they are
290	 * offloaded. Those aren't on the active list though, so don't attempt
291	 * to remove them.
292	 */
293	if (!ets_class_is_strict(q, cl) && sch->q.qlen)
294		list_del(&cl->alist);
295}
296
297static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
298			  struct sk_buff *skb, struct tcmsg *tcm)
299{
300	struct ets_class *cl = ets_class_from_arg(sch, arg);
301	struct ets_sched *q = qdisc_priv(sch);
302	struct nlattr *nest;
303
304	tcm->tcm_parent = TC_H_ROOT;
305	tcm->tcm_handle = ets_class_id(sch, cl);
306	tcm->tcm_info = cl->qdisc->handle;
307
308	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
309	if (!nest)
310		goto nla_put_failure;
311	if (!ets_class_is_strict(q, cl)) {
312		if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, cl->quantum))
313			goto nla_put_failure;
314	}
315	return nla_nest_end(skb, nest);
316
317nla_put_failure:
318	nla_nest_cancel(skb, nest);
319	return -EMSGSIZE;
320}
321
322static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
323				struct gnet_dump *d)
324{
325	struct ets_class *cl = ets_class_from_arg(sch, arg);
326	struct Qdisc *cl_q = cl->qdisc;
327
328	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
329	    qdisc_qstats_copy(d, cl_q) < 0)
330		return -1;
331
332	return 0;
333}
334
335static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
336{
337	struct ets_sched *q = qdisc_priv(sch);
338	int i;
339
340	if (arg->stop)
341		return;
342
343	for (i = 0; i < q->nbands; i++) {
344		if (!tc_qdisc_stats_dump(sch, i + 1, arg))
345			break;
346	}
347}
348
349static struct tcf_block *
350ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl,
351		    struct netlink_ext_ack *extack)
352{
353	struct ets_sched *q = qdisc_priv(sch);
354
355	if (cl) {
356		NL_SET_ERR_MSG(extack, "ETS classid must be zero");
357		return NULL;
358	}
359
360	return q->block;
361}
362
363static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent,
364					u32 classid)
365{
366	return ets_class_find(sch, classid);
367}
368
369static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
370{
371}
372
373static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
374				      int *qerr)
375{
376	struct ets_sched *q = qdisc_priv(sch);
377	u32 band = skb->priority;
378	struct tcf_result res;
379	struct tcf_proto *fl;
380	int err;
381
382	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
383	if (TC_H_MAJ(skb->priority) != sch->handle) {
384		fl = rcu_dereference_bh(q->filter_list);
385		err = tcf_classify(skb, NULL, fl, &res, false);
386#ifdef CONFIG_NET_CLS_ACT
387		switch (err) {
388		case TC_ACT_STOLEN:
389		case TC_ACT_QUEUED:
390		case TC_ACT_TRAP:
391			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
392			fallthrough;
393		case TC_ACT_SHOT:
394			return NULL;
395		}
396#endif
397		if (!fl || err < 0) {
398			if (TC_H_MAJ(band))
399				band = 0;
400			return &q->classes[q->prio2band[band & TC_PRIO_MAX]];
401		}
402		band = res.classid;
403	}
404	band = TC_H_MIN(band) - 1;
405	if (band >= q->nbands)
406		return &q->classes[q->prio2band[0]];
407	return &q->classes[band];
408}
409
410static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
411			     struct sk_buff **to_free)
412{
413	unsigned int len = qdisc_pkt_len(skb);
414	struct ets_sched *q = qdisc_priv(sch);
415	struct ets_class *cl;
416	int err = 0;
417	bool first;
418
419	cl = ets_classify(skb, sch, &err);
420	if (!cl) {
421		if (err & __NET_XMIT_BYPASS)
422			qdisc_qstats_drop(sch);
423		__qdisc_drop(skb, to_free);
424		return err;
425	}
426
427	first = !cl->qdisc->q.qlen;
428	err = qdisc_enqueue(skb, cl->qdisc, to_free);
429	if (unlikely(err != NET_XMIT_SUCCESS)) {
430		if (net_xmit_drop_count(err)) {
431			cl->qstats.drops++;
432			qdisc_qstats_drop(sch);
433		}
434		return err;
435	}
436
437	if (first && !ets_class_is_strict(q, cl)) {
438		list_add_tail(&cl->alist, &q->active);
439		cl->deficit = cl->quantum;
440	}
441
442	sch->qstats.backlog += len;
443	sch->q.qlen++;
444	return err;
445}
446
447static struct sk_buff *
448ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
449{
450	qdisc_bstats_update(sch, skb);
451	qdisc_qstats_backlog_dec(sch, skb);
452	sch->q.qlen--;
453	return skb;
454}
455
456static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
457{
458	struct ets_sched *q = qdisc_priv(sch);
459	struct ets_class *cl;
460	struct sk_buff *skb;
461	unsigned int band;
462	unsigned int len;
463
464	while (1) {
465		for (band = 0; band < q->nstrict; band++) {
466			cl = &q->classes[band];
467			skb = qdisc_dequeue_peeked(cl->qdisc);
468			if (skb)
469				return ets_qdisc_dequeue_skb(sch, skb);
470		}
471
472		if (list_empty(&q->active))
473			goto out;
474
475		cl = list_first_entry(&q->active, struct ets_class, alist);
476		skb = cl->qdisc->ops->peek(cl->qdisc);
477		if (!skb) {
478			qdisc_warn_nonwc(__func__, cl->qdisc);
479			goto out;
480		}
481
482		len = qdisc_pkt_len(skb);
483		if (len <= cl->deficit) {
484			cl->deficit -= len;
485			skb = qdisc_dequeue_peeked(cl->qdisc);
486			if (unlikely(!skb))
487				goto out;
488			if (cl->qdisc->q.qlen == 0)
489				list_del(&cl->alist);
490			return ets_qdisc_dequeue_skb(sch, skb);
491		}
492
493		cl->deficit += cl->quantum;
494		list_move_tail(&cl->alist, &q->active);
495	}
496out:
497	return NULL;
498}
499
500static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr,
501				   unsigned int nbands, u8 *priomap,
502				   struct netlink_ext_ack *extack)
503{
504	const struct nlattr *attr;
505	int prio = 0;
506	u8 band;
507	int rem;
508	int err;
509
510	err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX,
511				    ets_priomap_policy, NL_VALIDATE_STRICT,
512				    extack);
513	if (err)
514		return err;
515
516	nla_for_each_nested(attr, priomap_attr, rem) {
517		switch (nla_type(attr)) {
518		case TCA_ETS_PRIOMAP_BAND:
519			if (prio > TC_PRIO_MAX) {
520				NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap");
521				return -EINVAL;
522			}
523			band = nla_get_u8(attr);
524			if (band >= nbands) {
525				NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap");
526				return -EINVAL;
527			}
528			priomap[prio++] = band;
529			break;
530		default:
531			WARN_ON_ONCE(1); /* Validate should have caught this. */
532			return -EINVAL;
533		}
534	}
535
536	return 0;
537}
538
539static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr,
540				  unsigned int nbands, unsigned int nstrict,
541				  unsigned int *quanta,
542				  struct netlink_ext_ack *extack)
543{
544	const struct nlattr *attr;
545	int band = nstrict;
546	int rem;
547	int err;
548
549	err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX,
550				    ets_quanta_policy, NL_VALIDATE_STRICT,
551				    extack);
552	if (err < 0)
553		return err;
554
555	nla_for_each_nested(attr, quanta_attr, rem) {
556		switch (nla_type(attr)) {
557		case TCA_ETS_QUANTA_BAND:
558			if (band >= nbands) {
559				NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands");
560				return -EINVAL;
561			}
562			err = ets_quantum_parse(sch, attr, &quanta[band++],
563						extack);
564			if (err)
565				return err;
566			break;
567		default:
568			WARN_ON_ONCE(1); /* Validate should have caught this. */
569			return -EINVAL;
570		}
571	}
572
573	return 0;
574}
575
576static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
577			    struct netlink_ext_ack *extack)
578{
579	unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0};
580	struct Qdisc *queues[TCQ_ETS_MAX_BANDS];
581	struct ets_sched *q = qdisc_priv(sch);
582	struct nlattr *tb[TCA_ETS_MAX + 1];
583	unsigned int oldbands = q->nbands;
584	u8 priomap[TC_PRIO_MAX + 1];
585	unsigned int nstrict = 0;
586	unsigned int nbands;
587	unsigned int i;
588	int err;
589
590	err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
591	if (err < 0)
592		return err;
593
594	if (!tb[TCA_ETS_NBANDS]) {
595		NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument");
596		return -EINVAL;
597	}
598	nbands = nla_get_u8(tb[TCA_ETS_NBANDS]);
599	if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) {
600		NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands");
601		return -EINVAL;
602	}
603	/* Unless overridden, traffic goes to the last band. */
604	memset(priomap, nbands - 1, sizeof(priomap));
605
606	if (tb[TCA_ETS_NSTRICT]) {
607		nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]);
608		if (nstrict > nbands) {
609			NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands");
610			return -EINVAL;
611		}
612	}
613
614	if (tb[TCA_ETS_PRIOMAP]) {
615		err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP],
616					      nbands, priomap, extack);
617		if (err)
618			return err;
619	}
620
621	if (tb[TCA_ETS_QUANTA]) {
622		err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA],
623					     nbands, nstrict, quanta, extack);
624		if (err)
625			return err;
626	}
627	/* If there are more bands than strict + quanta provided, the remaining
628	 * ones are ETS with quantum of MTU. Initialize the missing values here.
629	 */
630	for (i = nstrict; i < nbands; i++) {
631		if (!quanta[i])
632			quanta[i] = psched_mtu(qdisc_dev(sch));
633	}
634
635	/* Before commit, make sure we can allocate all new qdiscs */
636	for (i = oldbands; i < nbands; i++) {
637		queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
638					      ets_class_id(sch, &q->classes[i]),
639					      extack);
640		if (!queues[i]) {
641			while (i > oldbands)
642				qdisc_put(queues[--i]);
643			return -ENOMEM;
644		}
645	}
646
647	sch_tree_lock(sch);
648
649	q->nbands = nbands;
650	for (i = nstrict; i < q->nstrict; i++) {
651		if (q->classes[i].qdisc->q.qlen) {
652			list_add_tail(&q->classes[i].alist, &q->active);
653			q->classes[i].deficit = quanta[i];
654		}
655	}
656	for (i = q->nbands; i < oldbands; i++) {
657		if (i >= q->nstrict && q->classes[i].qdisc->q.qlen)
658			list_del(&q->classes[i].alist);
659		qdisc_tree_flush_backlog(q->classes[i].qdisc);
660	}
661	q->nstrict = nstrict;
662	memcpy(q->prio2band, priomap, sizeof(priomap));
663
664	for (i = 0; i < q->nbands; i++)
665		q->classes[i].quantum = quanta[i];
666
667	for (i = oldbands; i < q->nbands; i++) {
668		q->classes[i].qdisc = queues[i];
669		if (q->classes[i].qdisc != &noop_qdisc)
670			qdisc_hash_add(q->classes[i].qdisc, true);
671	}
672
673	sch_tree_unlock(sch);
674
675	ets_offload_change(sch);
676	for (i = q->nbands; i < oldbands; i++) {
677		qdisc_put(q->classes[i].qdisc);
678		q->classes[i].qdisc = NULL;
679		q->classes[i].quantum = 0;
680		q->classes[i].deficit = 0;
681		gnet_stats_basic_sync_init(&q->classes[i].bstats);
682		memset(&q->classes[i].qstats, 0, sizeof(q->classes[i].qstats));
683	}
684	return 0;
685}
686
687static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
688			  struct netlink_ext_ack *extack)
689{
690	struct ets_sched *q = qdisc_priv(sch);
691	int err, i;
692
693	if (!opt)
694		return -EINVAL;
695
696	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
697	if (err)
698		return err;
699
700	INIT_LIST_HEAD(&q->active);
701	for (i = 0; i < TCQ_ETS_MAX_BANDS; i++)
702		INIT_LIST_HEAD(&q->classes[i].alist);
703
704	return ets_qdisc_change(sch, opt, extack);
705}
706
707static void ets_qdisc_reset(struct Qdisc *sch)
708{
709	struct ets_sched *q = qdisc_priv(sch);
710	int band;
711
712	for (band = q->nstrict; band < q->nbands; band++) {
713		if (q->classes[band].qdisc->q.qlen)
714			list_del(&q->classes[band].alist);
715	}
716	for (band = 0; band < q->nbands; band++)
717		qdisc_reset(q->classes[band].qdisc);
718}
719
720static void ets_qdisc_destroy(struct Qdisc *sch)
721{
722	struct ets_sched *q = qdisc_priv(sch);
723	int band;
724
725	ets_offload_destroy(sch);
726	tcf_block_put(q->block);
727	for (band = 0; band < q->nbands; band++)
728		qdisc_put(q->classes[band].qdisc);
729}
730
731static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
732{
733	struct ets_sched *q = qdisc_priv(sch);
734	struct nlattr *opts;
735	struct nlattr *nest;
 
736	int band;
737	int prio;
738	int err;
739
740	err = ets_offload_dump(sch);
741	if (err)
742		return err;
743
744	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
745	if (!opts)
746		goto nla_err;
747
748	if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
 
749		goto nla_err;
750
751	if (q->nstrict &&
752	    nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
753		goto nla_err;
754
755	if (q->nbands > q->nstrict) {
756		nest = nla_nest_start(skb, TCA_ETS_QUANTA);
757		if (!nest)
758			goto nla_err;
759
760		for (band = q->nstrict; band < q->nbands; band++) {
761			if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
762					q->classes[band].quantum))
763				goto nla_err;
764		}
765
766		nla_nest_end(skb, nest);
767	}
768
769	nest = nla_nest_start(skb, TCA_ETS_PRIOMAP);
770	if (!nest)
771		goto nla_err;
772
773	for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
774		if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
 
775			goto nla_err;
776	}
777
778	nla_nest_end(skb, nest);
779
780	return nla_nest_end(skb, opts);
781
782nla_err:
783	nla_nest_cancel(skb, opts);
784	return -EMSGSIZE;
785}
786
787static const struct Qdisc_class_ops ets_class_ops = {
788	.change		= ets_class_change,
789	.graft		= ets_class_graft,
790	.leaf		= ets_class_leaf,
791	.find		= ets_class_find,
792	.qlen_notify	= ets_class_qlen_notify,
793	.dump		= ets_class_dump,
794	.dump_stats	= ets_class_dump_stats,
795	.walk		= ets_qdisc_walk,
796	.tcf_block	= ets_qdisc_tcf_block,
797	.bind_tcf	= ets_qdisc_bind_tcf,
798	.unbind_tcf	= ets_qdisc_unbind_tcf,
799};
800
801static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
802	.cl_ops		= &ets_class_ops,
803	.id		= "ets",
804	.priv_size	= sizeof(struct ets_sched),
805	.enqueue	= ets_qdisc_enqueue,
806	.dequeue	= ets_qdisc_dequeue,
807	.peek		= qdisc_peek_dequeued,
808	.change		= ets_qdisc_change,
809	.init		= ets_qdisc_init,
810	.reset		= ets_qdisc_reset,
811	.destroy	= ets_qdisc_destroy,
812	.dump		= ets_qdisc_dump,
813	.owner		= THIS_MODULE,
814};
 
815
816static int __init ets_init(void)
817{
818	return register_qdisc(&ets_qdisc_ops);
819}
820
821static void __exit ets_exit(void)
822{
823	unregister_qdisc(&ets_qdisc_ops);
824}
825
826module_init(ets_init);
827module_exit(ets_exit);
828MODULE_LICENSE("GPL");