sch_cbq.c - net/sched/sch_cbq.c - Linux diff v3.15 - Bootlin Elixir Cross Referencer

   1/*
   2 * net/sched/sch_cbq.c	Class-Based Queueing discipline.
   3 *
   4 *		This program is free software; you can redistribute it and/or
   5 *		modify it under the terms of the GNU General Public License
   6 *		as published by the Free Software Foundation; either version
   7 *		2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/slab.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/string.h>
  18#include <linux/errno.h>
  19#include <linux/skbuff.h>
  20#include <net/netlink.h>
  21#include <net/pkt_sched.h>
  22
  23
  24/*	Class-Based Queueing (CBQ) algorithm.
  25	=======================================
  26
  27	Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28		 Management Models for Packet Networks",
  29		 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31		 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33		 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34		 Parameters", 1996
  35
  36		 [4] Sally Floyd and Michael Speer, "Experimental Results
  37		 for Class-Based Queueing", 1998, not published.
  38
  39	-----------------------------------------------------------------------
  40
  41	Algorithm skeleton was taken from NS simulator cbq.cc.
  42	If someone wants to check this code against the LBL version,
  43	he should take into account that ONLY the skeleton was borrowed,
  44	the implementation is different. Particularly:
  45
  46	--- The WRR algorithm is different. Our version looks more
  47	reasonable (I hope) and works when quanta are allowed to be
  48	less than MTU, which is always the case when real time classes
  49	have small rates. Note, that the statement of [3] is
  50	incomplete, delay may actually be estimated even if class
  51	per-round allotment is less than MTU. Namely, if per-round
  52	allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54	delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56	In the worst case we have IntServ estimate with D = W*r+k*MTU
  57	and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60	--- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61	interpret some places, which look like wrong translations
  62	from NS. Anyone is advised to find these differences
  63	and explain to me, why I am wrong 8).
  64
  65	--- Linux has no EOI event, so that we cannot estimate true class
  66	idle time. Workaround is to consider the next dequeue event
  67	as sign that previous packet is finished. This is wrong because of
  68	internal device queueing, but on a permanently loaded link it is true.
  69	Moreover, combined with clock integrator, this scheme looks
  70	very close to an ideal solution.  */
  71
  72struct cbq_sched_data;
  73
  74
  75struct cbq_class {
  76	struct Qdisc_class_common common;
  77	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
  78
  79/* Parameters */
  80	unsigned char		priority;	/* class priority */
  81	unsigned char		priority2;	/* priority to be used after overlimit */
  82	unsigned char		ewma_log;	/* time constant for idle time calculation */
  83	unsigned char		ovl_strategy;
  84#ifdef CONFIG_NET_CLS_ACT
  85	unsigned char		police;
  86#endif
  87
  88	u32			defmap;
  89
  90	/* Link-sharing scheduler parameters */
  91	long			maxidle;	/* Class parameters: see below. */
  92	long			offtime;
  93	long			minidle;
  94	u32			avpkt;
  95	struct qdisc_rate_table	*R_tab;
  96
  97	/* Overlimit strategy parameters */
  98	void			(*overlimit)(struct cbq_class *cl);
  99	psched_tdiff_t		penalty;
 100
 101	/* General scheduler (WRR) parameters */
 102	long			allot;
 103	long			quantum;	/* Allotment per WRR round */
 104	long			weight;		/* Relative allotment: see below */
 105
 106	struct Qdisc		*qdisc;		/* Ptr to CBQ discipline */
 107	struct cbq_class	*split;		/* Ptr to split node */
 108	struct cbq_class	*share;		/* Ptr to LS parent in the class tree */
 109	struct cbq_class	*tparent;	/* Ptr to tree parent in the class tree */
 110	struct cbq_class	*borrow;	/* NULL if class is bandwidth limited;
 111						   parent otherwise */
 112	struct cbq_class	*sibling;	/* Sibling chain */
 113	struct cbq_class	*children;	/* Pointer to children chain */
 114
 115	struct Qdisc		*q;		/* Elementary queueing discipline */
 116
 117
 118/* Variables */
 119	unsigned char		cpriority;	/* Effective priority */
 120	unsigned char		delayed;
 121	unsigned char		level;		/* level of the class in hierarchy:
 122						   0 for leaf classes, and maximal
 123						   level of children + 1 for nodes.
 124						 */
 125
 126	psched_time_t		last;		/* Last end of service */
 127	psched_time_t		undertime;
 128	long			avgidle;
 129	long			deficit;	/* Saved deficit for WRR */
 130	psched_time_t		penalized;
 131	struct gnet_stats_basic_packed bstats;
 132	struct gnet_stats_queue qstats;
 133	struct gnet_stats_rate_est64 rate_est;
 134	struct tc_cbq_xstats	xstats;
 135
 136	struct tcf_proto	*filter_list;
 137
 138	int			refcnt;
 139	int			filters;
 140
 141	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
 142};
 143
 144struct cbq_sched_data {
 145	struct Qdisc_class_hash	clhash;			/* Hash table of all classes */
 146	int			nclasses[TC_CBQ_MAXPRIO + 1];
 147	unsigned int		quanta[TC_CBQ_MAXPRIO + 1];
 148
 149	struct cbq_class	link;
 150
 151	unsigned int		activemask;
 152	struct cbq_class	*active[TC_CBQ_MAXPRIO + 1];	/* List of all classes
 153								   with backlog */
 154
 155#ifdef CONFIG_NET_CLS_ACT
 156	struct cbq_class	*rx_class;
 157#endif
 158	struct cbq_class	*tx_class;
 159	struct cbq_class	*tx_borrowed;
 160	int			tx_len;
 161	psched_time_t		now;		/* Cached timestamp */
 162	psched_time_t		now_rt;		/* Cached real time */
 163	unsigned int		pmask;
 164
 165	struct hrtimer		delay_timer;
 166	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
 167						   started when CBQ has
 168						   backlog, but cannot
 169						   transmit just now */
 170	psched_tdiff_t		wd_expires;
 171	int			toplevel;
 172	u32			hgenerator;
 173};
 174
 175
 176#define L2T(cl, len)	qdisc_l2t((cl)->R_tab, len)
 177
 178static inline struct cbq_class *
 179cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 180{
 181	struct Qdisc_class_common *clc;
 182
 183	clc = qdisc_class_find(&q->clhash, classid);
 184	if (clc == NULL)
 185		return NULL;
 186	return container_of(clc, struct cbq_class, common);
 187}
 188
 189#ifdef CONFIG_NET_CLS_ACT
 190
 191static struct cbq_class *
 192cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 193{
 194	struct cbq_class *cl;
 195
 196	for (cl = this->tparent; cl; cl = cl->tparent) {
 197		struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 198
 199		if (new != NULL && new != this)
 200			return new;
 201	}
 202	return NULL;
 203}
 204
 205#endif
 206
 207/* Classify packet. The procedure is pretty complicated, but
 208 * it allows us to combine link sharing and priority scheduling
 209 * transparently.
 210 *
 211 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212 * so that it resolves to split nodes. Then packets are classified
 213 * by logical priority, or a more specific classifier may be attached
 214 * to the split node.
 215 */
 216
 217static struct cbq_class *
 218cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219{
 220	struct cbq_sched_data *q = qdisc_priv(sch);
 221	struct cbq_class *head = &q->link;
 222	struct cbq_class **defmap;
 223	struct cbq_class *cl = NULL;
 224	u32 prio = skb->priority;
 225	struct tcf_result res;
 226
 227	/*
 228	 *  Step 1. If skb->priority points to one of our classes, use it.
 229	 */
 230	if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 231	    (cl = cbq_class_lookup(q, prio)) != NULL)
 232		return cl;
 233
 234	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235	for (;;) {
 236		int result = 0;
 237		defmap = head->defaults;
 238
 239		/*
 240		 * Step 2+n. Apply classifier.
 241		 */
 242		if (!head->filter_list ||
 243		    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244			goto fallback;
 245
 246		cl = (void *)res.class;
 247		if (!cl) {
 248			if (TC_H_MAJ(res.classid))
 249				cl = cbq_class_lookup(q, res.classid);
 250			else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 251				cl = defmap[TC_PRIO_BESTEFFORT];
 252
 253			if (cl == NULL)
 254				goto fallback;
 255		}
 256		if (cl->level >= head->level)
 257			goto fallback;
 258#ifdef CONFIG_NET_CLS_ACT
 259		switch (result) {
 260		case TC_ACT_QUEUED:
 261		case TC_ACT_STOLEN:
 262			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 263		case TC_ACT_SHOT:
 264			return NULL;
 265		case TC_ACT_RECLASSIFY:
 266			return cbq_reclassify(skb, cl);
 267		}
 268#endif
 269		if (cl->level == 0)
 270			return cl;
 271
 272		/*
 273		 * Step 3+n. If classifier selected a link sharing class,
 274		 *	   apply agency specific classifier.
 275		 *	   Repeat this procdure until we hit a leaf node.
 276		 */
 277		head = cl;
 278	}
 279
 280fallback:
 281	cl = head;
 282
 283	/*
 284	 * Step 4. No success...
 285	 */
 286	if (TC_H_MAJ(prio) == 0 &&
 287	    !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 288	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 289		return head;
 290
 291	return cl;
 292}
 293
 294/*
 295 * A packet has just been enqueued on the empty class.
 296 * cbq_activate_class adds it to the tail of active class list
 297 * of its priority band.
 298 */
 299
 300static inline void cbq_activate_class(struct cbq_class *cl)
 301{
 302	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 303	int prio = cl->cpriority;
 304	struct cbq_class *cl_tail;
 305
 306	cl_tail = q->active[prio];
 307	q->active[prio] = cl;
 308
 309	if (cl_tail != NULL) {
 310		cl->next_alive = cl_tail->next_alive;
 311		cl_tail->next_alive = cl;
 312	} else {
 313		cl->next_alive = cl;
 314		q->activemask |= (1<<prio);
 315	}
 316}
 317
 318/*
 319 * Unlink class from active chain.
 320 * Note that this same procedure is done directly in cbq_dequeue*
 321 * during round-robin procedure.
 322 */
 323
 324static void cbq_deactivate_class(struct cbq_class *this)
 325{
 326	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 327	int prio = this->cpriority;
 328	struct cbq_class *cl;
 329	struct cbq_class *cl_prev = q->active[prio];
 330
 331	do {
 332		cl = cl_prev->next_alive;
 333		if (cl == this) {
 334			cl_prev->next_alive = cl->next_alive;
 335			cl->next_alive = NULL;
 336
 337			if (cl == q->active[prio]) {
 338				q->active[prio] = cl_prev;
 339				if (cl == q->active[prio]) {
 340					q->active[prio] = NULL;
 341					q->activemask &= ~(1<<prio);
 342					return;
 343				}
 344			}
 345			return;
 346		}
 347	} while ((cl_prev = cl) != q->active[prio]);
 348}
 349
 350static void
 351cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 352{
 353	int toplevel = q->toplevel;
 354
 355	if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 356		psched_time_t now;
 357		psched_tdiff_t incr;
 358
 359		now = psched_get_time();
 360		incr = now - q->now_rt;
 361		now = q->now + incr;
 362
 363		do {
 364			if (cl->undertime < now) {
 365				q->toplevel = cl->level;
 366				return;
 367			}
 368		} while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 369	}
 370}
 371
 372static int
 373cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 374{
 375	struct cbq_sched_data *q = qdisc_priv(sch);
 376	int uninitialized_var(ret);
 377	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 378
 379#ifdef CONFIG_NET_CLS_ACT
 380	q->rx_class = cl;
 381#endif
 382	if (cl == NULL) {
 383		if (ret & __NET_XMIT_BYPASS)
 384			sch->qstats.drops++;
 385		kfree_skb(skb);
 386		return ret;
 387	}
 388
 389#ifdef CONFIG_NET_CLS_ACT
 390	cl->q->__parent = sch;
 391#endif
 392	ret = qdisc_enqueue(skb, cl->q);
 393	if (ret == NET_XMIT_SUCCESS) {
 394		sch->q.qlen++;
 395		cbq_mark_toplevel(q, cl);
 396		if (!cl->next_alive)
 397			cbq_activate_class(cl);
 398		return ret;
 399	}
 400
 401	if (net_xmit_drop_count(ret)) {
 402		sch->qstats.drops++;
 403		cbq_mark_toplevel(q, cl);
 404		cl->qstats.drops++;
 405	}
 406	return ret;
 407}
 408
 409/* Overlimit actions */
 410
 411/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 412
 413static void cbq_ovl_classic(struct cbq_class *cl)
 414{
 415	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 416	psched_tdiff_t delay = cl->undertime - q->now;
 417
 418	if (!cl->delayed) {
 419		delay += cl->offtime;
 420
 421		/*
 422		 * Class goes to sleep, so that it will have no
 423		 * chance to work avgidle. Let's forgive it 8)
 424		 *
 425		 * BTW cbq-2.0 has a crap in this
 426		 * place, apparently they forgot to shift it by cl->ewma_log.
 427		 */
 428		if (cl->avgidle < 0)
 429			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 430		if (cl->avgidle < cl->minidle)
 431			cl->avgidle = cl->minidle;
 432		if (delay <= 0)
 433			delay = 1;
 434		cl->undertime = q->now + delay;
 435
 436		cl->xstats.overactions++;
 437		cl->delayed = 1;
 438	}
 439	if (q->wd_expires == 0 || q->wd_expires > delay)
 440		q->wd_expires = delay;
 441
 442	/* Dirty work! We must schedule wakeups based on
 443	 * real available rate, rather than leaf rate,
 444	 * which may be tiny (even zero).
 445	 */
 446	if (q->toplevel == TC_CBQ_MAXLEVEL) {
 447		struct cbq_class *b;
 448		psched_tdiff_t base_delay = q->wd_expires;
 449
 450		for (b = cl->borrow; b; b = b->borrow) {
 451			delay = b->undertime - q->now;
 452			if (delay < base_delay) {
 453				if (delay <= 0)
 454					delay = 1;
 455				base_delay = delay;
 456			}
 457		}
 458
 459		q->wd_expires = base_delay;
 460	}
 461}
 462
 463/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 464 * they go overlimit
 465 */
 466
 467static void cbq_ovl_rclassic(struct cbq_class *cl)
 468{
 469	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 470	struct cbq_class *this = cl;
 471
 472	do {
 473		if (cl->level > q->toplevel) {
 474			cl = NULL;
 475			break;
 476		}
 477	} while ((cl = cl->borrow) != NULL);
 478
 479	if (cl == NULL)
 480		cl = this;
 481	cbq_ovl_classic(cl);
 482}
 483
 484/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 485
 486static void cbq_ovl_delay(struct cbq_class *cl)
 487{
 488	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 489	psched_tdiff_t delay = cl->undertime - q->now;
 490
 491	if (test_bit(__QDISC_STATE_DEACTIVATED,
 492		     &qdisc_root_sleeping(cl->qdisc)->state))
 493		return;
 494
 495	if (!cl->delayed) {
 496		psched_time_t sched = q->now;
 497		ktime_t expires;
 498
 499		delay += cl->offtime;
 500		if (cl->avgidle < 0)
 501			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 502		if (cl->avgidle < cl->minidle)
 503			cl->avgidle = cl->minidle;
 504		cl->undertime = q->now + delay;
 505
 506		if (delay > 0) {
 507			sched += delay + cl->penalty;
 508			cl->penalized = sched;
 509			cl->cpriority = TC_CBQ_MAXPRIO;
 510			q->pmask |= (1<<TC_CBQ_MAXPRIO);
 511
 512			expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
 
 513			if (hrtimer_try_to_cancel(&q->delay_timer) &&
 514			    ktime_to_ns(ktime_sub(
 515					hrtimer_get_expires(&q->delay_timer),
 516					expires)) > 0)
 517				hrtimer_set_expires(&q->delay_timer, expires);
 518			hrtimer_restart(&q->delay_timer);
 519			cl->delayed = 1;
 520			cl->xstats.overactions++;
 521			return;
 522		}
 523		delay = 1;
 524	}
 525	if (q->wd_expires == 0 || q->wd_expires > delay)
 526		q->wd_expires = delay;
 527}
 528
 529/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 530
 531static void cbq_ovl_lowprio(struct cbq_class *cl)
 532{
 533	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 534
 535	cl->penalized = q->now + cl->penalty;
 536
 537	if (cl->cpriority != cl->priority2) {
 538		cl->cpriority = cl->priority2;
 539		q->pmask |= (1<<cl->cpriority);
 540		cl->xstats.overactions++;
 541	}
 542	cbq_ovl_classic(cl);
 543}
 544
 545/* TC_CBQ_OVL_DROP: penalize class by dropping */
 546
 547static void cbq_ovl_drop(struct cbq_class *cl)
 548{
 549	if (cl->q->ops->drop)
 550		if (cl->q->ops->drop(cl->q))
 551			cl->qdisc->q.qlen--;
 552	cl->xstats.overactions++;
 553	cbq_ovl_classic(cl);
 554}
 555
 556static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 557				       psched_time_t now)
 558{
 559	struct cbq_class *cl;
 560	struct cbq_class *cl_prev = q->active[prio];
 561	psched_time_t sched = now;
 562
 563	if (cl_prev == NULL)
 564		return 0;
 565
 566	do {
 567		cl = cl_prev->next_alive;
 568		if (now - cl->penalized > 0) {
 569			cl_prev->next_alive = cl->next_alive;
 570			cl->next_alive = NULL;
 571			cl->cpriority = cl->priority;
 572			cl->delayed = 0;
 573			cbq_activate_class(cl);
 574
 575			if (cl == q->active[prio]) {
 576				q->active[prio] = cl_prev;
 577				if (cl == q->active[prio]) {
 578					q->active[prio] = NULL;
 579					return 0;
 580				}
 581			}
 582
 583			cl = cl_prev->next_alive;
 584		} else if (sched - cl->penalized > 0)
 585			sched = cl->penalized;
 586	} while ((cl_prev = cl) != q->active[prio]);
 587
 588	return sched - now;
 589}
 590
 591static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 592{
 593	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 594						delay_timer);
 595	struct Qdisc *sch = q->watchdog.qdisc;
 596	psched_time_t now;
 597	psched_tdiff_t delay = 0;
 598	unsigned int pmask;
 599
 600	now = psched_get_time();
 601
 602	pmask = q->pmask;
 603	q->pmask = 0;
 604
 605	while (pmask) {
 606		int prio = ffz(~pmask);
 607		psched_tdiff_t tmp;
 608
 609		pmask &= ~(1<<prio);
 610
 611		tmp = cbq_undelay_prio(q, prio, now);
 612		if (tmp > 0) {
 613			q->pmask |= 1<<prio;
 614			if (tmp < delay || delay == 0)
 615				delay = tmp;
 616		}
 617	}
 618
 619	if (delay) {
 620		ktime_t time;
 621
 622		time = ktime_set(0, 0);
 623		time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 624		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 625	}
 626
 627	qdisc_unthrottled(sch);
 628	__netif_schedule(qdisc_root(sch));
 629	return HRTIMER_NORESTART;
 630}
 631
 632#ifdef CONFIG_NET_CLS_ACT
 633static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 634{
 635	struct Qdisc *sch = child->__parent;
 636	struct cbq_sched_data *q = qdisc_priv(sch);
 637	struct cbq_class *cl = q->rx_class;
 638
 639	q->rx_class = NULL;
 640
 641	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 642		int ret;
 643
 644		cbq_mark_toplevel(q, cl);
 645
 646		q->rx_class = cl;
 647		cl->q->__parent = sch;
 648
 649		ret = qdisc_enqueue(skb, cl->q);
 650		if (ret == NET_XMIT_SUCCESS) {
 651			sch->q.qlen++;
 652			if (!cl->next_alive)
 653				cbq_activate_class(cl);
 654			return 0;
 655		}
 656		if (net_xmit_drop_count(ret))
 657			sch->qstats.drops++;
 658		return 0;
 659	}
 660
 661	sch->qstats.drops++;
 662	return -1;
 663}
 664#endif
 665
 666/*
 667 * It is mission critical procedure.
 668 *
 669 * We "regenerate" toplevel cutoff, if transmitting class
 670 * has backlog and it is not regulated. It is not part of
 671 * original CBQ description, but looks more reasonable.
 672 * Probably, it is wrong. This question needs further investigation.
 673 */
 674
 675static inline void
 676cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 677		    struct cbq_class *borrowed)
 678{
 679	if (cl && q->toplevel >= borrowed->level) {
 680		if (cl->q->q.qlen > 1) {
 681			do {
 682				if (borrowed->undertime == PSCHED_PASTPERFECT) {
 683					q->toplevel = borrowed->level;
 684					return;
 685				}
 686			} while ((borrowed = borrowed->borrow) != NULL);
 687		}
 688#if 0
 689	/* It is not necessary now. Uncommenting it
 690	   will save CPU cycles, but decrease fairness.
 691	 */
 692		q->toplevel = TC_CBQ_MAXLEVEL;
 693#endif
 694	}
 695}
 696
 697static void
 698cbq_update(struct cbq_sched_data *q)
 699{
 700	struct cbq_class *this = q->tx_class;
 701	struct cbq_class *cl = this;
 702	int len = q->tx_len;
 703
 704	q->tx_class = NULL;
 705
 706	for ( ; cl; cl = cl->share) {
 707		long avgidle = cl->avgidle;
 708		long idle;
 709
 710		cl->bstats.packets++;
 711		cl->bstats.bytes += len;
 712
 713		/*
 714		 * (now - last) is total time between packet right edges.
 715		 * (last_pktlen/rate) is "virtual" busy time, so that
 716		 *
 717		 *	idle = (now - last) - last_pktlen/rate
 718		 */
 719
 720		idle = q->now - cl->last;
 721		if ((unsigned long)idle > 128*1024*1024) {
 722			avgidle = cl->maxidle;
 723		} else {
 724			idle -= L2T(cl, len);
 725
 726		/* true_avgidle := (1-W)*true_avgidle + W*idle,
 727		 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 728		 * cl->avgidle == true_avgidle/W,
 729		 * hence:
 730		 */
 731			avgidle += idle - (avgidle>>cl->ewma_log);
 732		}
 733
 734		if (avgidle <= 0) {
 735			/* Overlimit or at-limit */
 736
 737			if (avgidle < cl->minidle)
 738				avgidle = cl->minidle;
 739
 740			cl->avgidle = avgidle;
 741
 742			/* Calculate expected time, when this class
 743			 * will be allowed to send.
 744			 * It will occur, when:
 745			 * (1-W)*true_avgidle + W*delay = 0, i.e.
 746			 * idle = (1/W - 1)*(-true_avgidle)
 747			 * or
 748			 * idle = (1 - W)*(-cl->avgidle);
 749			 */
 750			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 751
 752			/*
 753			 * That is not all.
 754			 * To maintain the rate allocated to the class,
 755			 * we add to undertime virtual clock,
 756			 * necessary to complete transmitted packet.
 757			 * (len/phys_bandwidth has been already passed
 758			 * to the moment of cbq_update)
 759			 */
 760
 761			idle -= L2T(&q->link, len);
 762			idle += L2T(cl, len);
 763
 764			cl->undertime = q->now + idle;
 765		} else {
 766			/* Underlimit */
 767
 768			cl->undertime = PSCHED_PASTPERFECT;
 769			if (avgidle > cl->maxidle)
 770				cl->avgidle = cl->maxidle;
 771			else
 772				cl->avgidle = avgidle;
 773		}
 774		cl->last = q->now;
 775	}
 776
 777	cbq_update_toplevel(q, this, q->tx_borrowed);
 778}
 779
 780static inline struct cbq_class *
 781cbq_under_limit(struct cbq_class *cl)
 782{
 783	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 784	struct cbq_class *this_cl = cl;
 785
 786	if (cl->tparent == NULL)
 787		return cl;
 788
 789	if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 790		cl->delayed = 0;
 791		return cl;
 792	}
 793
 794	do {
 795		/* It is very suspicious place. Now overlimit
 796		 * action is generated for not bounded classes
 797		 * only if link is completely congested.
 798		 * Though it is in agree with ancestor-only paradigm,
 799		 * it looks very stupid. Particularly,
 800		 * it means that this chunk of code will either
 801		 * never be called or result in strong amplification
 802		 * of burstiness. Dangerous, silly, and, however,
 803		 * no another solution exists.
 804		 */
 805		cl = cl->borrow;
 806		if (!cl) {
 807			this_cl->qstats.overlimits++;
 808			this_cl->overlimit(this_cl);
 809			return NULL;
 810		}
 811		if (cl->level > q->toplevel)
 812			return NULL;
 813	} while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 814
 815	cl->delayed = 0;
 816	return cl;
 817}
 818
 819static inline struct sk_buff *
 820cbq_dequeue_prio(struct Qdisc *sch, int prio)
 821{
 822	struct cbq_sched_data *q = qdisc_priv(sch);
 823	struct cbq_class *cl_tail, *cl_prev, *cl;
 824	struct sk_buff *skb;
 825	int deficit;
 826
 827	cl_tail = cl_prev = q->active[prio];
 828	cl = cl_prev->next_alive;
 829
 830	do {
 831		deficit = 0;
 832
 833		/* Start round */
 834		do {
 835			struct cbq_class *borrow = cl;
 836
 837			if (cl->q->q.qlen &&
 838			    (borrow = cbq_under_limit(cl)) == NULL)
 839				goto skip_class;
 840
 841			if (cl->deficit <= 0) {
 842				/* Class exhausted its allotment per
 843				 * this round. Switch to the next one.
 844				 */
 845				deficit = 1;
 846				cl->deficit += cl->quantum;
 847				goto next_class;
 848			}
 849
 850			skb = cl->q->dequeue(cl->q);
 851
 852			/* Class did not give us any skb :-(
 853			 * It could occur even if cl->q->q.qlen != 0
 854			 * f.e. if cl->q == "tbf"
 855			 */
 856			if (skb == NULL)
 857				goto skip_class;
 858
 859			cl->deficit -= qdisc_pkt_len(skb);
 860			q->tx_class = cl;
 861			q->tx_borrowed = borrow;
 862			if (borrow != cl) {
 863#ifndef CBQ_XSTATS_BORROWS_BYTES
 864				borrow->xstats.borrows++;
 865				cl->xstats.borrows++;
 866#else
 867				borrow->xstats.borrows += qdisc_pkt_len(skb);
 868				cl->xstats.borrows += qdisc_pkt_len(skb);
 869#endif
 870			}
 871			q->tx_len = qdisc_pkt_len(skb);
 872
 873			if (cl->deficit <= 0) {
 874				q->active[prio] = cl;
 875				cl = cl->next_alive;
 876				cl->deficit += cl->quantum;
 877			}
 878			return skb;
 879
 880skip_class:
 881			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 882				/* Class is empty or penalized.
 883				 * Unlink it from active chain.
 884				 */
 885				cl_prev->next_alive = cl->next_alive;
 886				cl->next_alive = NULL;
 887
 888				/* Did cl_tail point to it? */
 889				if (cl == cl_tail) {
 890					/* Repair it! */
 891					cl_tail = cl_prev;
 892
 893					/* Was it the last class in this band? */
 894					if (cl == cl_tail) {
 895						/* Kill the band! */
 896						q->active[prio] = NULL;
 897						q->activemask &= ~(1<<prio);
 898						if (cl->q->q.qlen)
 899							cbq_activate_class(cl);
 900						return NULL;
 901					}
 902
 903					q->active[prio] = cl_tail;
 904				}
 905				if (cl->q->q.qlen)
 906					cbq_activate_class(cl);
 907
 908				cl = cl_prev;
 909			}
 910
 911next_class:
 912			cl_prev = cl;
 913			cl = cl->next_alive;
 914		} while (cl_prev != cl_tail);
 915	} while (deficit);
 916
 917	q->active[prio] = cl_prev;
 918
 919	return NULL;
 920}
 921
 922static inline struct sk_buff *
 923cbq_dequeue_1(struct Qdisc *sch)
 924{
 925	struct cbq_sched_data *q = qdisc_priv(sch);
 926	struct sk_buff *skb;
 927	unsigned int activemask;
 928
 929	activemask = q->activemask & 0xFF;
 930	while (activemask) {
 931		int prio = ffz(~activemask);
 932		activemask &= ~(1<<prio);
 933		skb = cbq_dequeue_prio(sch, prio);
 934		if (skb)
 935			return skb;
 936	}
 937	return NULL;
 938}
 939
 940static struct sk_buff *
 941cbq_dequeue(struct Qdisc *sch)
 942{
 943	struct sk_buff *skb;
 944	struct cbq_sched_data *q = qdisc_priv(sch);
 945	psched_time_t now;
 946	psched_tdiff_t incr;
 947
 948	now = psched_get_time();
 949	incr = now - q->now_rt;
 950
 951	if (q->tx_class) {
 952		psched_tdiff_t incr2;
 953		/* Time integrator. We calculate EOS time
 954		 * by adding expected packet transmission time.
 955		 * If real time is greater, we warp artificial clock,
 956		 * so that:
 957		 *
 958		 * cbq_time = max(real_time, work);
 959		 */
 960		incr2 = L2T(&q->link, q->tx_len);
 961		q->now += incr2;
 962		cbq_update(q);
 963		if ((incr -= incr2) < 0)
 964			incr = 0;
 965		q->now += incr;
 966	} else {
 967		if (now > q->now)
 968			q->now = now;
 969	}
 
 970	q->now_rt = now;
 971
 972	for (;;) {
 973		q->wd_expires = 0;
 974
 975		skb = cbq_dequeue_1(sch);
 976		if (skb) {
 977			qdisc_bstats_update(sch, skb);
 978			sch->q.qlen--;
 979			qdisc_unthrottled(sch);
 980			return skb;
 981		}
 982
 983		/* All the classes are overlimit.
 984		 *
 985		 * It is possible, if:
 986		 *
 987		 * 1. Scheduler is empty.
 988		 * 2. Toplevel cutoff inhibited borrowing.
 989		 * 3. Root class is overlimit.
 990		 *
 991		 * Reset 2d and 3d conditions and retry.
 992		 *
 993		 * Note, that NS and cbq-2.0 are buggy, peeking
 994		 * an arbitrary class is appropriate for ancestor-only
 995		 * sharing, but not for toplevel algorithm.
 996		 *
 997		 * Our version is better, but slower, because it requires
 998		 * two passes, but it is unavoidable with top-level sharing.
 999		 */
1000
1001		if (q->toplevel == TC_CBQ_MAXLEVEL &&
1002		    q->link.undertime == PSCHED_PASTPERFECT)
1003			break;
1004
1005		q->toplevel = TC_CBQ_MAXLEVEL;
1006		q->link.undertime = PSCHED_PASTPERFECT;
1007	}
1008
1009	/* No packets in scheduler or nobody wants to give them to us :-(
1010	 * Sigh... start watchdog timer in the last case.
1011	 */
1012
1013	if (sch->q.qlen) {
1014		sch->qstats.overlimits++;
1015		if (q->wd_expires)
1016			qdisc_watchdog_schedule(&q->watchdog,
1017						now + q->wd_expires);
1018	}
1019	return NULL;
1020}
1021
1022/* CBQ class maintanance routines */
1023
1024static void cbq_adjust_levels(struct cbq_class *this)
1025{
1026	if (this == NULL)
1027		return;
1028
1029	do {
1030		int level = 0;
1031		struct cbq_class *cl;
1032
1033		cl = this->children;
1034		if (cl) {
1035			do {
1036				if (cl->level > level)
1037					level = cl->level;
1038			} while ((cl = cl->sibling) != this->children);
1039		}
1040		this->level = level + 1;
1041	} while ((this = this->tparent) != NULL);
1042}
1043
1044static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1045{
1046	struct cbq_class *cl;
 
1047	unsigned int h;
1048
1049	if (q->quanta[prio] == 0)
1050		return;
1051
1052	for (h = 0; h < q->clhash.hashsize; h++) {
1053		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1054			/* BUGGGG... Beware! This expression suffer of
1055			 * arithmetic overflows!
1056			 */
1057			if (cl->priority == prio) {
1058				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1059					q->quanta[prio];
1060			}
1061			if (cl->quantum <= 0 ||
1062			    cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
1063				pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1064					cl->common.classid, cl->quantum);
1065				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1066			}
1067		}
1068	}
1069}
1070
1071static void cbq_sync_defmap(struct cbq_class *cl)
1072{
1073	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1074	struct cbq_class *split = cl->split;
1075	unsigned int h;
1076	int i;
1077
1078	if (split == NULL)
1079		return;
1080
1081	for (i = 0; i <= TC_PRIO_MAX; i++) {
1082		if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1083			split->defaults[i] = NULL;
1084	}
1085
1086	for (i = 0; i <= TC_PRIO_MAX; i++) {
1087		int level = split->level;
1088
1089		if (split->defaults[i])
1090			continue;
1091
1092		for (h = 0; h < q->clhash.hashsize; h++) {
 
1093			struct cbq_class *c;
1094
1095			hlist_for_each_entry(c, &q->clhash.hash[h],
1096					     common.hnode) {
1097				if (c->split == split && c->level < level &&
1098				    c->defmap & (1<<i)) {
1099					split->defaults[i] = c;
1100					level = c->level;
1101				}
1102			}
1103		}
1104	}
1105}
1106
1107static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1108{
1109	struct cbq_class *split = NULL;
1110
1111	if (splitid == 0) {
1112		split = cl->split;
1113		if (!split)
1114			return;
1115		splitid = split->common.classid;
1116	}
1117
1118	if (split == NULL || split->common.classid != splitid) {
1119		for (split = cl->tparent; split; split = split->tparent)
1120			if (split->common.classid == splitid)
1121				break;
1122	}
1123
1124	if (split == NULL)
1125		return;
1126
1127	if (cl->split != split) {
1128		cl->defmap = 0;
1129		cbq_sync_defmap(cl);
1130		cl->split = split;
1131		cl->defmap = def & mask;
1132	} else
1133		cl->defmap = (cl->defmap & ~mask) | (def & mask);
1134
1135	cbq_sync_defmap(cl);
1136}
1137
1138static void cbq_unlink_class(struct cbq_class *this)
1139{
1140	struct cbq_class *cl, **clp;
1141	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1142
1143	qdisc_class_hash_remove(&q->clhash, &this->common);
1144
1145	if (this->tparent) {
1146		clp = &this->sibling;
1147		cl = *clp;
1148		do {
1149			if (cl == this) {
1150				*clp = cl->sibling;
1151				break;
1152			}
1153			clp = &cl->sibling;
1154		} while ((cl = *clp) != this->sibling);
1155
1156		if (this->tparent->children == this) {
1157			this->tparent->children = this->sibling;
1158			if (this->sibling == this)
1159				this->tparent->children = NULL;
1160		}
1161	} else {
1162		WARN_ON(this->sibling != this);
1163	}
1164}
1165
1166static void cbq_link_class(struct cbq_class *this)
1167{
1168	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1169	struct cbq_class *parent = this->tparent;
1170
1171	this->sibling = this;
1172	qdisc_class_hash_insert(&q->clhash, &this->common);
1173
1174	if (parent == NULL)
1175		return;
1176
1177	if (parent->children == NULL) {
1178		parent->children = this;
1179	} else {
1180		this->sibling = parent->children->sibling;
1181		parent->children->sibling = this;
1182	}
1183}
1184
1185static unsigned int cbq_drop(struct Qdisc *sch)
1186{
1187	struct cbq_sched_data *q = qdisc_priv(sch);
1188	struct cbq_class *cl, *cl_head;
1189	int prio;
1190	unsigned int len;
1191
1192	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1193		cl_head = q->active[prio];
1194		if (!cl_head)
1195			continue;
1196
1197		cl = cl_head;
1198		do {
1199			if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1200				sch->q.qlen--;
1201				if (!cl->q->q.qlen)
1202					cbq_deactivate_class(cl);
1203				return len;
1204			}
1205		} while ((cl = cl->next_alive) != cl_head);
1206	}
1207	return 0;
1208}
1209
1210static void
1211cbq_reset(struct Qdisc *sch)
1212{
1213	struct cbq_sched_data *q = qdisc_priv(sch);
1214	struct cbq_class *cl;
 
1215	int prio;
1216	unsigned int h;
1217
1218	q->activemask = 0;
1219	q->pmask = 0;
1220	q->tx_class = NULL;
1221	q->tx_borrowed = NULL;
1222	qdisc_watchdog_cancel(&q->watchdog);
1223	hrtimer_cancel(&q->delay_timer);
1224	q->toplevel = TC_CBQ_MAXLEVEL;
1225	q->now = psched_get_time();
1226	q->now_rt = q->now;
1227
1228	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1229		q->active[prio] = NULL;
1230
1231	for (h = 0; h < q->clhash.hashsize; h++) {
1232		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1233			qdisc_reset(cl->q);
1234
1235			cl->next_alive = NULL;
1236			cl->undertime = PSCHED_PASTPERFECT;
1237			cl->avgidle = cl->maxidle;
1238			cl->deficit = cl->quantum;
1239			cl->cpriority = cl->priority;
1240		}
1241	}
1242	sch->q.qlen = 0;
1243}
1244
1245
1246static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1247{
1248	if (lss->change & TCF_CBQ_LSS_FLAGS) {
1249		cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1250		cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1251	}
1252	if (lss->change & TCF_CBQ_LSS_EWMA)
1253		cl->ewma_log = lss->ewma_log;
1254	if (lss->change & TCF_CBQ_LSS_AVPKT)
1255		cl->avpkt = lss->avpkt;
1256	if (lss->change & TCF_CBQ_LSS_MINIDLE)
1257		cl->minidle = -(long)lss->minidle;
1258	if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1259		cl->maxidle = lss->maxidle;
1260		cl->avgidle = lss->maxidle;
1261	}
1262	if (lss->change & TCF_CBQ_LSS_OFFTIME)
1263		cl->offtime = lss->offtime;
1264	return 0;
1265}
1266
1267static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1268{
1269	q->nclasses[cl->priority]--;
1270	q->quanta[cl->priority] -= cl->weight;
1271	cbq_normalize_quanta(q, cl->priority);
1272}
1273
1274static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1275{
1276	q->nclasses[cl->priority]++;
1277	q->quanta[cl->priority] += cl->weight;
1278	cbq_normalize_quanta(q, cl->priority);
1279}
1280
1281static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1282{
1283	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1284
1285	if (wrr->allot)
1286		cl->allot = wrr->allot;
1287	if (wrr->weight)
1288		cl->weight = wrr->weight;
1289	if (wrr->priority) {
1290		cl->priority = wrr->priority - 1;
1291		cl->cpriority = cl->priority;
1292		if (cl->priority >= cl->priority2)
1293			cl->priority2 = TC_CBQ_MAXPRIO - 1;
1294	}
1295
1296	cbq_addprio(q, cl);
1297	return 0;
1298}
1299
1300static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1301{
1302	switch (ovl->strategy) {
1303	case TC_CBQ_OVL_CLASSIC:
1304		cl->overlimit = cbq_ovl_classic;
1305		break;
1306	case TC_CBQ_OVL_DELAY:
1307		cl->overlimit = cbq_ovl_delay;
1308		break;
1309	case TC_CBQ_OVL_LOWPRIO:
1310		if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1311		    ovl->priority2 - 1 <= cl->priority)
1312			return -EINVAL;
1313		cl->priority2 = ovl->priority2 - 1;
1314		cl->overlimit = cbq_ovl_lowprio;
1315		break;
1316	case TC_CBQ_OVL_DROP:
1317		cl->overlimit = cbq_ovl_drop;
1318		break;
1319	case TC_CBQ_OVL_RCLASSIC:
1320		cl->overlimit = cbq_ovl_rclassic;
1321		break;
1322	default:
1323		return -EINVAL;
1324	}
1325	cl->penalty = ovl->penalty;
1326	return 0;
1327}
1328
1329#ifdef CONFIG_NET_CLS_ACT
1330static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1331{
1332	cl->police = p->police;
1333
1334	if (cl->q->handle) {
1335		if (p->police == TC_POLICE_RECLASSIFY)
1336			cl->q->reshape_fail = cbq_reshape_fail;
1337		else
1338			cl->q->reshape_fail = NULL;
1339	}
1340	return 0;
1341}
1342#endif
1343
1344static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1345{
1346	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1347	return 0;
1348}
1349
1350static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1351	[TCA_CBQ_LSSOPT]	= { .len = sizeof(struct tc_cbq_lssopt) },
1352	[TCA_CBQ_WRROPT]	= { .len = sizeof(struct tc_cbq_wrropt) },
1353	[TCA_CBQ_FOPT]		= { .len = sizeof(struct tc_cbq_fopt) },
1354	[TCA_CBQ_OVL_STRATEGY]	= { .len = sizeof(struct tc_cbq_ovl) },
1355	[TCA_CBQ_RATE]		= { .len = sizeof(struct tc_ratespec) },
1356	[TCA_CBQ_RTAB]		= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1357	[TCA_CBQ_POLICE]	= { .len = sizeof(struct tc_cbq_police) },
1358};
1359
1360static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1361{
1362	struct cbq_sched_data *q = qdisc_priv(sch);
1363	struct nlattr *tb[TCA_CBQ_MAX + 1];
1364	struct tc_ratespec *r;
1365	int err;
1366
1367	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1368	if (err < 0)
1369		return err;
1370
1371	if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1372		return -EINVAL;
1373
1374	r = nla_data(tb[TCA_CBQ_RATE]);
1375
1376	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1377		return -EINVAL;
1378
1379	err = qdisc_class_hash_init(&q->clhash);
1380	if (err < 0)
1381		goto put_rtab;
1382
1383	q->link.refcnt = 1;
1384	q->link.sibling = &q->link;
1385	q->link.common.classid = sch->handle;
1386	q->link.qdisc = sch;
1387	q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1388				      sch->handle);
1389	if (!q->link.q)
1390		q->link.q = &noop_qdisc;
1391
1392	q->link.priority = TC_CBQ_MAXPRIO - 1;
1393	q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1394	q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1395	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1396	q->link.overlimit = cbq_ovl_classic;
1397	q->link.allot = psched_mtu(qdisc_dev(sch));
1398	q->link.quantum = q->link.allot;
1399	q->link.weight = q->link.R_tab->rate.rate;
1400
1401	q->link.ewma_log = TC_CBQ_DEF_EWMA;
1402	q->link.avpkt = q->link.allot/2;
1403	q->link.minidle = -0x7FFFFFFF;
1404
1405	qdisc_watchdog_init(&q->watchdog, sch);
1406	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1407	q->delay_timer.function = cbq_undelay;
1408	q->toplevel = TC_CBQ_MAXLEVEL;
1409	q->now = psched_get_time();
1410	q->now_rt = q->now;
1411
1412	cbq_link_class(&q->link);
1413
1414	if (tb[TCA_CBQ_LSSOPT])
1415		cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1416
1417	cbq_addprio(q, &q->link);
1418	return 0;
1419
1420put_rtab:
1421	qdisc_put_rtab(q->link.R_tab);
1422	return err;
1423}
1424
1425static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1426{
1427	unsigned char *b = skb_tail_pointer(skb);
1428
1429	if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1430		goto nla_put_failure;
1431	return skb->len;
1432
1433nla_put_failure:
1434	nlmsg_trim(skb, b);
1435	return -1;
1436}
1437
1438static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1439{
1440	unsigned char *b = skb_tail_pointer(skb);
1441	struct tc_cbq_lssopt opt;
1442
1443	opt.flags = 0;
1444	if (cl->borrow == NULL)
1445		opt.flags |= TCF_CBQ_LSS_BOUNDED;
1446	if (cl->share == NULL)
1447		opt.flags |= TCF_CBQ_LSS_ISOLATED;
1448	opt.ewma_log = cl->ewma_log;
1449	opt.level = cl->level;
1450	opt.avpkt = cl->avpkt;
1451	opt.maxidle = cl->maxidle;
1452	opt.minidle = (u32)(-cl->minidle);
1453	opt.offtime = cl->offtime;
1454	opt.change = ~0;
1455	if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1456		goto nla_put_failure;
1457	return skb->len;
1458
1459nla_put_failure:
1460	nlmsg_trim(skb, b);
1461	return -1;
1462}
1463
1464static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1465{
1466	unsigned char *b = skb_tail_pointer(skb);
1467	struct tc_cbq_wrropt opt;
1468
1469	memset(&opt, 0, sizeof(opt));
1470	opt.flags = 0;
1471	opt.allot = cl->allot;
1472	opt.priority = cl->priority + 1;
1473	opt.cpriority = cl->cpriority + 1;
1474	opt.weight = cl->weight;
1475	if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1476		goto nla_put_failure;
1477	return skb->len;
1478
1479nla_put_failure:
1480	nlmsg_trim(skb, b);
1481	return -1;
1482}
1483
1484static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1485{
1486	unsigned char *b = skb_tail_pointer(skb);
1487	struct tc_cbq_ovl opt;
1488
1489	opt.strategy = cl->ovl_strategy;
1490	opt.priority2 = cl->priority2 + 1;
1491	opt.pad = 0;
1492	opt.penalty = cl->penalty;
1493	if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1494		goto nla_put_failure;
1495	return skb->len;
1496
1497nla_put_failure:
1498	nlmsg_trim(skb, b);
1499	return -1;
1500}
1501
1502static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1503{
1504	unsigned char *b = skb_tail_pointer(skb);
1505	struct tc_cbq_fopt opt;
1506
1507	if (cl->split || cl->defmap) {
1508		opt.split = cl->split ? cl->split->common.classid : 0;
1509		opt.defmap = cl->defmap;
1510		opt.defchange = ~0;
1511		if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1512			goto nla_put_failure;
1513	}
1514	return skb->len;
1515
1516nla_put_failure:
1517	nlmsg_trim(skb, b);
1518	return -1;
1519}
1520
1521#ifdef CONFIG_NET_CLS_ACT
1522static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1523{
1524	unsigned char *b = skb_tail_pointer(skb);
1525	struct tc_cbq_police opt;
1526
1527	if (cl->police) {
1528		opt.police = cl->police;
1529		opt.__res1 = 0;
1530		opt.__res2 = 0;
1531		if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1532			goto nla_put_failure;
1533	}
1534	return skb->len;
1535
1536nla_put_failure:
1537	nlmsg_trim(skb, b);
1538	return -1;
1539}
1540#endif
1541
1542static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1543{
1544	if (cbq_dump_lss(skb, cl) < 0 ||
1545	    cbq_dump_rate(skb, cl) < 0 ||
1546	    cbq_dump_wrr(skb, cl) < 0 ||
1547	    cbq_dump_ovl(skb, cl) < 0 ||
1548#ifdef CONFIG_NET_CLS_ACT
1549	    cbq_dump_police(skb, cl) < 0 ||
1550#endif
1551	    cbq_dump_fopt(skb, cl) < 0)
1552		return -1;
1553	return 0;
1554}
1555
1556static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1557{
1558	struct cbq_sched_data *q = qdisc_priv(sch);
1559	struct nlattr *nest;
1560
1561	nest = nla_nest_start(skb, TCA_OPTIONS);
1562	if (nest == NULL)
1563		goto nla_put_failure;
1564	if (cbq_dump_attr(skb, &q->link) < 0)
1565		goto nla_put_failure;
1566	return nla_nest_end(skb, nest);
 
1567
1568nla_put_failure:
1569	nla_nest_cancel(skb, nest);
1570	return -1;
1571}
1572
1573static int
1574cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1575{
1576	struct cbq_sched_data *q = qdisc_priv(sch);
1577
1578	q->link.xstats.avgidle = q->link.avgidle;
1579	return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1580}
1581
1582static int
1583cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1584	       struct sk_buff *skb, struct tcmsg *tcm)
1585{
1586	struct cbq_class *cl = (struct cbq_class *)arg;
1587	struct nlattr *nest;
1588
1589	if (cl->tparent)
1590		tcm->tcm_parent = cl->tparent->common.classid;
1591	else
1592		tcm->tcm_parent = TC_H_ROOT;
1593	tcm->tcm_handle = cl->common.classid;
1594	tcm->tcm_info = cl->q->handle;
1595
1596	nest = nla_nest_start(skb, TCA_OPTIONS);
1597	if (nest == NULL)
1598		goto nla_put_failure;
1599	if (cbq_dump_attr(skb, cl) < 0)
1600		goto nla_put_failure;
1601	return nla_nest_end(skb, nest);
 
1602
1603nla_put_failure:
1604	nla_nest_cancel(skb, nest);
1605	return -1;
1606}
1607
1608static int
1609cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1610	struct gnet_dump *d)
1611{
1612	struct cbq_sched_data *q = qdisc_priv(sch);
1613	struct cbq_class *cl = (struct cbq_class *)arg;
1614
1615	cl->qstats.qlen = cl->q->q.qlen;
1616	cl->xstats.avgidle = cl->avgidle;
1617	cl->xstats.undertime = 0;
1618
1619	if (cl->undertime != PSCHED_PASTPERFECT)
1620		cl->xstats.undertime = cl->undertime - q->now;
1621
1622	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1623	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1624	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
1625		return -1;
1626
1627	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1628}
1629
1630static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1631		     struct Qdisc **old)
1632{
1633	struct cbq_class *cl = (struct cbq_class *)arg;
1634
1635	if (new == NULL) {
1636		new = qdisc_create_dflt(sch->dev_queue,
1637					&pfifo_qdisc_ops, cl->common.classid);
1638		if (new == NULL)
1639			return -ENOBUFS;
1640	} else {
1641#ifdef CONFIG_NET_CLS_ACT
1642		if (cl->police == TC_POLICE_RECLASSIFY)
1643			new->reshape_fail = cbq_reshape_fail;
1644#endif
1645	}
1646	sch_tree_lock(sch);
1647	*old = cl->q;
1648	cl->q = new;
1649	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1650	qdisc_reset(*old);
1651	sch_tree_unlock(sch);
1652
1653	return 0;
1654}
1655
1656static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1657{
1658	struct cbq_class *cl = (struct cbq_class *)arg;
1659
1660	return cl->q;
1661}
1662
1663static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1664{
1665	struct cbq_class *cl = (struct cbq_class *)arg;
1666
1667	if (cl->q->q.qlen == 0)
1668		cbq_deactivate_class(cl);
1669}
1670
1671static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1672{
1673	struct cbq_sched_data *q = qdisc_priv(sch);
1674	struct cbq_class *cl = cbq_class_lookup(q, classid);
1675
1676	if (cl) {
1677		cl->refcnt++;
1678		return (unsigned long)cl;
1679	}
1680	return 0;
1681}
1682
1683static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1684{
1685	struct cbq_sched_data *q = qdisc_priv(sch);
1686
1687	WARN_ON(cl->filters);
1688
1689	tcf_destroy_chain(&cl->filter_list);
1690	qdisc_destroy(cl->q);
1691	qdisc_put_rtab(cl->R_tab);
1692	gen_kill_estimator(&cl->bstats, &cl->rate_est);
1693	if (cl != &q->link)
1694		kfree(cl);
1695}
1696
1697static void cbq_destroy(struct Qdisc *sch)
1698{
1699	struct cbq_sched_data *q = qdisc_priv(sch);
1700	struct hlist_node *next;
1701	struct cbq_class *cl;
1702	unsigned int h;
1703
1704#ifdef CONFIG_NET_CLS_ACT
1705	q->rx_class = NULL;
1706#endif
1707	/*
1708	 * Filters must be destroyed first because we don't destroy the
1709	 * classes from root to leafs which means that filters can still
1710	 * be bound to classes which have been destroyed already. --TGR '04
1711	 */
1712	for (h = 0; h < q->clhash.hashsize; h++) {
1713		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
1714			tcf_destroy_chain(&cl->filter_list);
1715	}
1716	for (h = 0; h < q->clhash.hashsize; h++) {
1717		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
1718					  common.hnode)
1719			cbq_destroy_class(sch, cl);
1720	}
1721	qdisc_class_hash_destroy(&q->clhash);
1722}
1723
1724static void cbq_put(struct Qdisc *sch, unsigned long arg)
1725{
1726	struct cbq_class *cl = (struct cbq_class *)arg;
1727
1728	if (--cl->refcnt == 0) {
1729#ifdef CONFIG_NET_CLS_ACT
1730		spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1731		struct cbq_sched_data *q = qdisc_priv(sch);
1732
1733		spin_lock_bh(root_lock);
1734		if (q->rx_class == cl)
1735			q->rx_class = NULL;
1736		spin_unlock_bh(root_lock);
1737#endif
1738
1739		cbq_destroy_class(sch, cl);
1740	}
1741}
1742
1743static int
1744cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1745		 unsigned long *arg)
1746{
1747	int err;
1748	struct cbq_sched_data *q = qdisc_priv(sch);
1749	struct cbq_class *cl = (struct cbq_class *)*arg;
1750	struct nlattr *opt = tca[TCA_OPTIONS];
1751	struct nlattr *tb[TCA_CBQ_MAX + 1];
1752	struct cbq_class *parent;
1753	struct qdisc_rate_table *rtab = NULL;
1754
1755	if (opt == NULL)
1756		return -EINVAL;
1757
1758	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1759	if (err < 0)
1760		return err;
1761
1762	if (cl) {
1763		/* Check parent */
1764		if (parentid) {
1765			if (cl->tparent &&
1766			    cl->tparent->common.classid != parentid)
1767				return -EINVAL;
1768			if (!cl->tparent && parentid != TC_H_ROOT)
1769				return -EINVAL;
1770		}
1771
1772		if (tb[TCA_CBQ_RATE]) {
1773			rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1774					      tb[TCA_CBQ_RTAB]);
1775			if (rtab == NULL)
1776				return -EINVAL;
1777		}
1778
1779		if (tca[TCA_RATE]) {
1780			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1781						    qdisc_root_sleeping_lock(sch),
1782						    tca[TCA_RATE]);
1783			if (err) {
1784				qdisc_put_rtab(rtab);
 
1785				return err;
1786			}
1787		}
1788
1789		/* Change class parameters */
1790		sch_tree_lock(sch);
1791
1792		if (cl->next_alive != NULL)
1793			cbq_deactivate_class(cl);
1794
1795		if (rtab) {
1796			qdisc_put_rtab(cl->R_tab);
1797			cl->R_tab = rtab;
1798		}
1799
1800		if (tb[TCA_CBQ_LSSOPT])
1801			cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1802
1803		if (tb[TCA_CBQ_WRROPT]) {
1804			cbq_rmprio(q, cl);
1805			cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1806		}
1807
1808		if (tb[TCA_CBQ_OVL_STRATEGY])
1809			cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1810
1811#ifdef CONFIG_NET_CLS_ACT
1812		if (tb[TCA_CBQ_POLICE])
1813			cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1814#endif
1815
1816		if (tb[TCA_CBQ_FOPT])
1817			cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1818
1819		if (cl->q->q.qlen)
1820			cbq_activate_class(cl);
1821
1822		sch_tree_unlock(sch);
1823
1824		return 0;
1825	}
1826
1827	if (parentid == TC_H_ROOT)
1828		return -EINVAL;
1829
1830	if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1831	    tb[TCA_CBQ_LSSOPT] == NULL)
1832		return -EINVAL;
1833
1834	rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1835	if (rtab == NULL)
1836		return -EINVAL;
1837
1838	if (classid) {
1839		err = -EINVAL;
1840		if (TC_H_MAJ(classid ^ sch->handle) ||
1841		    cbq_class_lookup(q, classid))
1842			goto failure;
1843	} else {
1844		int i;
1845		classid = TC_H_MAKE(sch->handle, 0x8000);
1846
1847		for (i = 0; i < 0x8000; i++) {
1848			if (++q->hgenerator >= 0x8000)
1849				q->hgenerator = 1;
1850			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1851				break;
1852		}
1853		err = -ENOSR;
1854		if (i >= 0x8000)
1855			goto failure;
1856		classid = classid|q->hgenerator;
1857	}
1858
1859	parent = &q->link;
1860	if (parentid) {
1861		parent = cbq_class_lookup(q, parentid);
1862		err = -EINVAL;
1863		if (parent == NULL)
1864			goto failure;
1865	}
1866
1867	err = -ENOBUFS;
1868	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1869	if (cl == NULL)
1870		goto failure;
1871
1872	if (tca[TCA_RATE]) {
1873		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1874					qdisc_root_sleeping_lock(sch),
1875					tca[TCA_RATE]);
1876		if (err) {
1877			kfree(cl);
1878			goto failure;
1879		}
1880	}
1881
1882	cl->R_tab = rtab;
1883	rtab = NULL;
1884	cl->refcnt = 1;
1885	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1886	if (!cl->q)
1887		cl->q = &noop_qdisc;
1888	cl->common.classid = classid;
1889	cl->tparent = parent;
1890	cl->qdisc = sch;
1891	cl->allot = parent->allot;
1892	cl->quantum = cl->allot;
1893	cl->weight = cl->R_tab->rate.rate;
1894
1895	sch_tree_lock(sch);
1896	cbq_link_class(cl);
1897	cl->borrow = cl->tparent;
1898	if (cl->tparent != &q->link)
1899		cl->share = cl->tparent;
1900	cbq_adjust_levels(parent);
1901	cl->minidle = -0x7FFFFFFF;
1902	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1903	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1904	if (cl->ewma_log == 0)
1905		cl->ewma_log = q->link.ewma_log;
1906	if (cl->maxidle == 0)
1907		cl->maxidle = q->link.maxidle;
1908	if (cl->avpkt == 0)
1909		cl->avpkt = q->link.avpkt;
1910	cl->overlimit = cbq_ovl_classic;
1911	if (tb[TCA_CBQ_OVL_STRATEGY])
1912		cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1913#ifdef CONFIG_NET_CLS_ACT
1914	if (tb[TCA_CBQ_POLICE])
1915		cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1916#endif
1917	if (tb[TCA_CBQ_FOPT])
1918		cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1919	sch_tree_unlock(sch);
1920
1921	qdisc_class_hash_grow(sch, &q->clhash);
1922
1923	*arg = (unsigned long)cl;
1924	return 0;
1925
1926failure:
1927	qdisc_put_rtab(rtab);
1928	return err;
1929}
1930
1931static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1932{
1933	struct cbq_sched_data *q = qdisc_priv(sch);
1934	struct cbq_class *cl = (struct cbq_class *)arg;
1935	unsigned int qlen;
1936
1937	if (cl->filters || cl->children || cl == &q->link)
1938		return -EBUSY;
1939
1940	sch_tree_lock(sch);
1941
1942	qlen = cl->q->q.qlen;
1943	qdisc_reset(cl->q);
1944	qdisc_tree_decrease_qlen(cl->q, qlen);
1945
1946	if (cl->next_alive)
1947		cbq_deactivate_class(cl);
1948
1949	if (q->tx_borrowed == cl)
1950		q->tx_borrowed = q->tx_class;
1951	if (q->tx_class == cl) {
1952		q->tx_class = NULL;
1953		q->tx_borrowed = NULL;
1954	}
1955#ifdef CONFIG_NET_CLS_ACT
1956	if (q->rx_class == cl)
1957		q->rx_class = NULL;
1958#endif
1959
1960	cbq_unlink_class(cl);
1961	cbq_adjust_levels(cl->tparent);
1962	cl->defmap = 0;
1963	cbq_sync_defmap(cl);
1964
1965	cbq_rmprio(q, cl);
1966	sch_tree_unlock(sch);
1967
1968	BUG_ON(--cl->refcnt == 0);
1969	/*
1970	 * This shouldn't happen: we "hold" one cops->get() when called
1971	 * from tc_ctl_tclass; the destroy method is done from cops->put().
1972	 */
1973
1974	return 0;
1975}
1976
1977static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1978{
1979	struct cbq_sched_data *q = qdisc_priv(sch);
1980	struct cbq_class *cl = (struct cbq_class *)arg;
1981
1982	if (cl == NULL)
1983		cl = &q->link;
1984
1985	return &cl->filter_list;
1986}
1987
1988static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1989				     u32 classid)
1990{
1991	struct cbq_sched_data *q = qdisc_priv(sch);
1992	struct cbq_class *p = (struct cbq_class *)parent;
1993	struct cbq_class *cl = cbq_class_lookup(q, classid);
1994
1995	if (cl) {
1996		if (p && p->level <= cl->level)
1997			return 0;
1998		cl->filters++;
1999		return (unsigned long)cl;
2000	}
2001	return 0;
2002}
2003
2004static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2005{
2006	struct cbq_class *cl = (struct cbq_class *)arg;
2007
2008	cl->filters--;
2009}
2010
2011static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2012{
2013	struct cbq_sched_data *q = qdisc_priv(sch);
2014	struct cbq_class *cl;
 
2015	unsigned int h;
2016
2017	if (arg->stop)
2018		return;
2019
2020	for (h = 0; h < q->clhash.hashsize; h++) {
2021		hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
2022			if (arg->count < arg->skip) {
2023				arg->count++;
2024				continue;
2025			}
2026			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2027				arg->stop = 1;
2028				return;
2029			}
2030			arg->count++;
2031		}
2032	}
2033}
2034
2035static const struct Qdisc_class_ops cbq_class_ops = {
2036	.graft		=	cbq_graft,
2037	.leaf		=	cbq_leaf,
2038	.qlen_notify	=	cbq_qlen_notify,
2039	.get		=	cbq_get,
2040	.put		=	cbq_put,
2041	.change		=	cbq_change_class,
2042	.delete		=	cbq_delete,
2043	.walk		=	cbq_walk,
2044	.tcf_chain	=	cbq_find_tcf,
2045	.bind_tcf	=	cbq_bind_filter,
2046	.unbind_tcf	=	cbq_unbind_filter,
2047	.dump		=	cbq_dump_class,
2048	.dump_stats	=	cbq_dump_class_stats,
2049};
2050
2051static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2052	.next		=	NULL,
2053	.cl_ops		=	&cbq_class_ops,
2054	.id		=	"cbq",
2055	.priv_size	=	sizeof(struct cbq_sched_data),
2056	.enqueue	=	cbq_enqueue,
2057	.dequeue	=	cbq_dequeue,
2058	.peek		=	qdisc_peek_dequeued,
2059	.drop		=	cbq_drop,
2060	.init		=	cbq_init,
2061	.reset		=	cbq_reset,
2062	.destroy	=	cbq_destroy,
2063	.change		=	NULL,
2064	.dump		=	cbq_dump,
2065	.dump_stats	=	cbq_dump_stats,
2066	.owner		=	THIS_MODULE,
2067};
2068
2069static int __init cbq_module_init(void)
2070{
2071	return register_qdisc(&cbq_qdisc_ops);
2072}
2073static void __exit cbq_module_exit(void)
2074{
2075	unregister_qdisc(&cbq_qdisc_ops);
2076}
2077module_init(cbq_module_init)
2078module_exit(cbq_module_exit)
2079MODULE_LICENSE("GPL");

   1/*
   2 * net/sched/sch_cbq.c	Class-Based Queueing discipline.
   3 *
   4 *		This program is free software; you can redistribute it and/or
   5 *		modify it under the terms of the GNU General Public License
   6 *		as published by the Free Software Foundation; either version
   7 *		2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 *
  11 */
  12
  13#include <linux/module.h>
  14#include <linux/slab.h>
  15#include <linux/types.h>
  16#include <linux/kernel.h>
  17#include <linux/string.h>
  18#include <linux/errno.h>
  19#include <linux/skbuff.h>
  20#include <net/netlink.h>
  21#include <net/pkt_sched.h>
  22
  23
  24/*	Class-Based Queueing (CBQ) algorithm.
  25	=======================================
  26
  27	Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  28		 Management Models for Packet Networks",
  29		 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  30
  31		 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  32
  33		 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  34		 Parameters", 1996
  35
  36		 [4] Sally Floyd and Michael Speer, "Experimental Results
  37		 for Class-Based Queueing", 1998, not published.
  38
  39	-----------------------------------------------------------------------
  40
  41	Algorithm skeleton was taken from NS simulator cbq.cc.
  42	If someone wants to check this code against the LBL version,
  43	he should take into account that ONLY the skeleton was borrowed,
  44	the implementation is different. Particularly:
  45
  46	--- The WRR algorithm is different. Our version looks more
  47	reasonable (I hope) and works when quanta are allowed to be
  48	less than MTU, which is always the case when real time classes
  49	have small rates. Note, that the statement of [3] is
  50	incomplete, delay may actually be estimated even if class
  51	per-round allotment is less than MTU. Namely, if per-round
  52	allotment is W*r_i, and r_1+...+r_k = r < 1
  53
  54	delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  55
  56	In the worst case we have IntServ estimate with D = W*r+k*MTU
  57	and C = MTU*r. The proof (if correct at all) is trivial.
  58
  59
  60	--- It seems that cbq-2.0 is not very accurate. At least, I cannot
  61	interpret some places, which look like wrong translations
  62	from NS. Anyone is advised to find these differences
  63	and explain to me, why I am wrong 8).
  64
  65	--- Linux has no EOI event, so that we cannot estimate true class
  66	idle time. Workaround is to consider the next dequeue event
  67	as sign that previous packet is finished. This is wrong because of
  68	internal device queueing, but on a permanently loaded link it is true.
  69	Moreover, combined with clock integrator, this scheme looks
  70	very close to an ideal solution.  */
  71
  72struct cbq_sched_data;
  73
  74
  75struct cbq_class {
  76	struct Qdisc_class_common common;
  77	struct cbq_class	*next_alive;	/* next class with backlog in this priority band */
  78
  79/* Parameters */
  80	unsigned char		priority;	/* class priority */
  81	unsigned char		priority2;	/* priority to be used after overlimit */
  82	unsigned char		ewma_log;	/* time constant for idle time calculation */
  83	unsigned char		ovl_strategy;
  84#ifdef CONFIG_NET_CLS_ACT
  85	unsigned char		police;
  86#endif
  87
  88	u32			defmap;
  89
  90	/* Link-sharing scheduler parameters */
  91	long			maxidle;	/* Class parameters: see below. */
  92	long			offtime;
  93	long			minidle;
  94	u32			avpkt;
  95	struct qdisc_rate_table	*R_tab;
  96
  97	/* Overlimit strategy parameters */
  98	void			(*overlimit)(struct cbq_class *cl);
  99	psched_tdiff_t		penalty;
 100
 101	/* General scheduler (WRR) parameters */
 102	long			allot;
 103	long			quantum;	/* Allotment per WRR round */
 104	long			weight;		/* Relative allotment: see below */
 105
 106	struct Qdisc		*qdisc;		/* Ptr to CBQ discipline */
 107	struct cbq_class	*split;		/* Ptr to split node */
 108	struct cbq_class	*share;		/* Ptr to LS parent in the class tree */
 109	struct cbq_class	*tparent;	/* Ptr to tree parent in the class tree */
 110	struct cbq_class	*borrow;	/* NULL if class is bandwidth limited;
 111						   parent otherwise */
 112	struct cbq_class	*sibling;	/* Sibling chain */
 113	struct cbq_class	*children;	/* Pointer to children chain */
 114
 115	struct Qdisc		*q;		/* Elementary queueing discipline */
 116
 117
 118/* Variables */
 119	unsigned char		cpriority;	/* Effective priority */
 120	unsigned char		delayed;
 121	unsigned char		level;		/* level of the class in hierarchy:
 122						   0 for leaf classes, and maximal
 123						   level of children + 1 for nodes.
 124						 */
 125
 126	psched_time_t		last;		/* Last end of service */
 127	psched_time_t		undertime;
 128	long			avgidle;
 129	long			deficit;	/* Saved deficit for WRR */
 130	psched_time_t		penalized;
 131	struct gnet_stats_basic_packed bstats;
 132	struct gnet_stats_queue qstats;
 133	struct gnet_stats_rate_est rate_est;
 134	struct tc_cbq_xstats	xstats;
 135
 136	struct tcf_proto	*filter_list;
 137
 138	int			refcnt;
 139	int			filters;
 140
 141	struct cbq_class	*defaults[TC_PRIO_MAX + 1];
 142};
 143
 144struct cbq_sched_data {
 145	struct Qdisc_class_hash	clhash;			/* Hash table of all classes */
 146	int			nclasses[TC_CBQ_MAXPRIO + 1];
 147	unsigned int		quanta[TC_CBQ_MAXPRIO + 1];
 148
 149	struct cbq_class	link;
 150
 151	unsigned int		activemask;
 152	struct cbq_class	*active[TC_CBQ_MAXPRIO + 1];	/* List of all classes
 153								   with backlog */
 154
 155#ifdef CONFIG_NET_CLS_ACT
 156	struct cbq_class	*rx_class;
 157#endif
 158	struct cbq_class	*tx_class;
 159	struct cbq_class	*tx_borrowed;
 160	int			tx_len;
 161	psched_time_t		now;		/* Cached timestamp */
 162	psched_time_t		now_rt;		/* Cached real time */
 163	unsigned int		pmask;
 164
 165	struct hrtimer		delay_timer;
 166	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
 167						   started when CBQ has
 168						   backlog, but cannot
 169						   transmit just now */
 170	psched_tdiff_t		wd_expires;
 171	int			toplevel;
 172	u32			hgenerator;
 173};
 174
 175
 176#define L2T(cl, len)	qdisc_l2t((cl)->R_tab, len)
 177
 178static inline struct cbq_class *
 179cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 180{
 181	struct Qdisc_class_common *clc;
 182
 183	clc = qdisc_class_find(&q->clhash, classid);
 184	if (clc == NULL)
 185		return NULL;
 186	return container_of(clc, struct cbq_class, common);
 187}
 188
 189#ifdef CONFIG_NET_CLS_ACT
 190
 191static struct cbq_class *
 192cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 193{
 194	struct cbq_class *cl;
 195
 196	for (cl = this->tparent; cl; cl = cl->tparent) {
 197		struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
 198
 199		if (new != NULL && new != this)
 200			return new;
 201	}
 202	return NULL;
 203}
 204
 205#endif
 206
 207/* Classify packet. The procedure is pretty complicated, but
 208 * it allows us to combine link sharing and priority scheduling
 209 * transparently.
 210 *
 211 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 212 * so that it resolves to split nodes. Then packets are classified
 213 * by logical priority, or a more specific classifier may be attached
 214 * to the split node.
 215 */
 216
 217static struct cbq_class *
 218cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 219{
 220	struct cbq_sched_data *q = qdisc_priv(sch);
 221	struct cbq_class *head = &q->link;
 222	struct cbq_class **defmap;
 223	struct cbq_class *cl = NULL;
 224	u32 prio = skb->priority;
 225	struct tcf_result res;
 226
 227	/*
 228	 *  Step 1. If skb->priority points to one of our classes, use it.
 229	 */
 230	if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
 231	    (cl = cbq_class_lookup(q, prio)) != NULL)
 232		return cl;
 233
 234	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235	for (;;) {
 236		int result = 0;
 237		defmap = head->defaults;
 238
 239		/*
 240		 * Step 2+n. Apply classifier.
 241		 */
 242		if (!head->filter_list ||
 243		    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 244			goto fallback;
 245
 246		cl = (void *)res.class;
 247		if (!cl) {
 248			if (TC_H_MAJ(res.classid))
 249				cl = cbq_class_lookup(q, res.classid);
 250			else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
 251				cl = defmap[TC_PRIO_BESTEFFORT];
 252
 253			if (cl == NULL || cl->level >= head->level)
 254				goto fallback;
 255		}
 256
 
 257#ifdef CONFIG_NET_CLS_ACT
 258		switch (result) {
 259		case TC_ACT_QUEUED:
 260		case TC_ACT_STOLEN:
 261			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 262		case TC_ACT_SHOT:
 263			return NULL;
 264		case TC_ACT_RECLASSIFY:
 265			return cbq_reclassify(skb, cl);
 266		}
 267#endif
 268		if (cl->level == 0)
 269			return cl;
 270
 271		/*
 272		 * Step 3+n. If classifier selected a link sharing class,
 273		 *	   apply agency specific classifier.
 274		 *	   Repeat this procdure until we hit a leaf node.
 275		 */
 276		head = cl;
 277	}
 278
 279fallback:
 280	cl = head;
 281
 282	/*
 283	 * Step 4. No success...
 284	 */
 285	if (TC_H_MAJ(prio) == 0 &&
 286	    !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
 287	    !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 288		return head;
 289
 290	return cl;
 291}
 292
 293/*
 294 * A packet has just been enqueued on the empty class.
 295 * cbq_activate_class adds it to the tail of active class list
 296 * of its priority band.
 297 */
 298
 299static inline void cbq_activate_class(struct cbq_class *cl)
 300{
 301	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 302	int prio = cl->cpriority;
 303	struct cbq_class *cl_tail;
 304
 305	cl_tail = q->active[prio];
 306	q->active[prio] = cl;
 307
 308	if (cl_tail != NULL) {
 309		cl->next_alive = cl_tail->next_alive;
 310		cl_tail->next_alive = cl;
 311	} else {
 312		cl->next_alive = cl;
 313		q->activemask |= (1<<prio);
 314	}
 315}
 316
 317/*
 318 * Unlink class from active chain.
 319 * Note that this same procedure is done directly in cbq_dequeue*
 320 * during round-robin procedure.
 321 */
 322
 323static void cbq_deactivate_class(struct cbq_class *this)
 324{
 325	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 326	int prio = this->cpriority;
 327	struct cbq_class *cl;
 328	struct cbq_class *cl_prev = q->active[prio];
 329
 330	do {
 331		cl = cl_prev->next_alive;
 332		if (cl == this) {
 333			cl_prev->next_alive = cl->next_alive;
 334			cl->next_alive = NULL;
 335
 336			if (cl == q->active[prio]) {
 337				q->active[prio] = cl_prev;
 338				if (cl == q->active[prio]) {
 339					q->active[prio] = NULL;
 340					q->activemask &= ~(1<<prio);
 341					return;
 342				}
 343			}
 344			return;
 345		}
 346	} while ((cl_prev = cl) != q->active[prio]);
 347}
 348
 349static void
 350cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 351{
 352	int toplevel = q->toplevel;
 353
 354	if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
 355		psched_time_t now;
 356		psched_tdiff_t incr;
 357
 358		now = psched_get_time();
 359		incr = now - q->now_rt;
 360		now = q->now + incr;
 361
 362		do {
 363			if (cl->undertime < now) {
 364				q->toplevel = cl->level;
 365				return;
 366			}
 367		} while ((cl = cl->borrow) != NULL && toplevel > cl->level);
 368	}
 369}
 370
 371static int
 372cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 373{
 374	struct cbq_sched_data *q = qdisc_priv(sch);
 375	int uninitialized_var(ret);
 376	struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 377
 378#ifdef CONFIG_NET_CLS_ACT
 379	q->rx_class = cl;
 380#endif
 381	if (cl == NULL) {
 382		if (ret & __NET_XMIT_BYPASS)
 383			sch->qstats.drops++;
 384		kfree_skb(skb);
 385		return ret;
 386	}
 387
 388#ifdef CONFIG_NET_CLS_ACT
 389	cl->q->__parent = sch;
 390#endif
 391	ret = qdisc_enqueue(skb, cl->q);
 392	if (ret == NET_XMIT_SUCCESS) {
 393		sch->q.qlen++;
 394		cbq_mark_toplevel(q, cl);
 395		if (!cl->next_alive)
 396			cbq_activate_class(cl);
 397		return ret;
 398	}
 399
 400	if (net_xmit_drop_count(ret)) {
 401		sch->qstats.drops++;
 402		cbq_mark_toplevel(q, cl);
 403		cl->qstats.drops++;
 404	}
 405	return ret;
 406}
 407
 408/* Overlimit actions */
 409
 410/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 411
 412static void cbq_ovl_classic(struct cbq_class *cl)
 413{
 414	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 415	psched_tdiff_t delay = cl->undertime - q->now;
 416
 417	if (!cl->delayed) {
 418		delay += cl->offtime;
 419
 420		/*
 421		 * Class goes to sleep, so that it will have no
 422		 * chance to work avgidle. Let's forgive it 8)
 423		 *
 424		 * BTW cbq-2.0 has a crap in this
 425		 * place, apparently they forgot to shift it by cl->ewma_log.
 426		 */
 427		if (cl->avgidle < 0)
 428			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 429		if (cl->avgidle < cl->minidle)
 430			cl->avgidle = cl->minidle;
 431		if (delay <= 0)
 432			delay = 1;
 433		cl->undertime = q->now + delay;
 434
 435		cl->xstats.overactions++;
 436		cl->delayed = 1;
 437	}
 438	if (q->wd_expires == 0 || q->wd_expires > delay)
 439		q->wd_expires = delay;
 440
 441	/* Dirty work! We must schedule wakeups based on
 442	 * real available rate, rather than leaf rate,
 443	 * which may be tiny (even zero).
 444	 */
 445	if (q->toplevel == TC_CBQ_MAXLEVEL) {
 446		struct cbq_class *b;
 447		psched_tdiff_t base_delay = q->wd_expires;
 448
 449		for (b = cl->borrow; b; b = b->borrow) {
 450			delay = b->undertime - q->now;
 451			if (delay < base_delay) {
 452				if (delay <= 0)
 453					delay = 1;
 454				base_delay = delay;
 455			}
 456		}
 457
 458		q->wd_expires = base_delay;
 459	}
 460}
 461
 462/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 463 * they go overlimit
 464 */
 465
 466static void cbq_ovl_rclassic(struct cbq_class *cl)
 467{
 468	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 469	struct cbq_class *this = cl;
 470
 471	do {
 472		if (cl->level > q->toplevel) {
 473			cl = NULL;
 474			break;
 475		}
 476	} while ((cl = cl->borrow) != NULL);
 477
 478	if (cl == NULL)
 479		cl = this;
 480	cbq_ovl_classic(cl);
 481}
 482
 483/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 484
 485static void cbq_ovl_delay(struct cbq_class *cl)
 486{
 487	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 488	psched_tdiff_t delay = cl->undertime - q->now;
 489
 490	if (test_bit(__QDISC_STATE_DEACTIVATED,
 491		     &qdisc_root_sleeping(cl->qdisc)->state))
 492		return;
 493
 494	if (!cl->delayed) {
 495		psched_time_t sched = q->now;
 496		ktime_t expires;
 497
 498		delay += cl->offtime;
 499		if (cl->avgidle < 0)
 500			delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 501		if (cl->avgidle < cl->minidle)
 502			cl->avgidle = cl->minidle;
 503		cl->undertime = q->now + delay;
 504
 505		if (delay > 0) {
 506			sched += delay + cl->penalty;
 507			cl->penalized = sched;
 508			cl->cpriority = TC_CBQ_MAXPRIO;
 509			q->pmask |= (1<<TC_CBQ_MAXPRIO);
 510
 511			expires = ktime_set(0, 0);
 512			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
 513			if (hrtimer_try_to_cancel(&q->delay_timer) &&
 514			    ktime_to_ns(ktime_sub(
 515					hrtimer_get_expires(&q->delay_timer),
 516					expires)) > 0)
 517				hrtimer_set_expires(&q->delay_timer, expires);
 518			hrtimer_restart(&q->delay_timer);
 519			cl->delayed = 1;
 520			cl->xstats.overactions++;
 521			return;
 522		}
 523		delay = 1;
 524	}
 525	if (q->wd_expires == 0 || q->wd_expires > delay)
 526		q->wd_expires = delay;
 527}
 528
 529/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 530
 531static void cbq_ovl_lowprio(struct cbq_class *cl)
 532{
 533	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 534
 535	cl->penalized = q->now + cl->penalty;
 536
 537	if (cl->cpriority != cl->priority2) {
 538		cl->cpriority = cl->priority2;
 539		q->pmask |= (1<<cl->cpriority);
 540		cl->xstats.overactions++;
 541	}
 542	cbq_ovl_classic(cl);
 543}
 544
 545/* TC_CBQ_OVL_DROP: penalize class by dropping */
 546
 547static void cbq_ovl_drop(struct cbq_class *cl)
 548{
 549	if (cl->q->ops->drop)
 550		if (cl->q->ops->drop(cl->q))
 551			cl->qdisc->q.qlen--;
 552	cl->xstats.overactions++;
 553	cbq_ovl_classic(cl);
 554}
 555
 556static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 557				       psched_time_t now)
 558{
 559	struct cbq_class *cl;
 560	struct cbq_class *cl_prev = q->active[prio];
 561	psched_time_t sched = now;
 562
 563	if (cl_prev == NULL)
 564		return 0;
 565
 566	do {
 567		cl = cl_prev->next_alive;
 568		if (now - cl->penalized > 0) {
 569			cl_prev->next_alive = cl->next_alive;
 570			cl->next_alive = NULL;
 571			cl->cpriority = cl->priority;
 572			cl->delayed = 0;
 573			cbq_activate_class(cl);
 574
 575			if (cl == q->active[prio]) {
 576				q->active[prio] = cl_prev;
 577				if (cl == q->active[prio]) {
 578					q->active[prio] = NULL;
 579					return 0;
 580				}
 581			}
 582
 583			cl = cl_prev->next_alive;
 584		} else if (sched - cl->penalized > 0)
 585			sched = cl->penalized;
 586	} while ((cl_prev = cl) != q->active[prio]);
 587
 588	return sched - now;
 589}
 590
 591static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 592{
 593	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 594						delay_timer);
 595	struct Qdisc *sch = q->watchdog.qdisc;
 596	psched_time_t now;
 597	psched_tdiff_t delay = 0;
 598	unsigned int pmask;
 599
 600	now = psched_get_time();
 601
 602	pmask = q->pmask;
 603	q->pmask = 0;
 604
 605	while (pmask) {
 606		int prio = ffz(~pmask);
 607		psched_tdiff_t tmp;
 608
 609		pmask &= ~(1<<prio);
 610
 611		tmp = cbq_undelay_prio(q, prio, now);
 612		if (tmp > 0) {
 613			q->pmask |= 1<<prio;
 614			if (tmp < delay || delay == 0)
 615				delay = tmp;
 616		}
 617	}
 618
 619	if (delay) {
 620		ktime_t time;
 621
 622		time = ktime_set(0, 0);
 623		time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
 624		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 625	}
 626
 627	qdisc_unthrottled(sch);
 628	__netif_schedule(qdisc_root(sch));
 629	return HRTIMER_NORESTART;
 630}
 631
 632#ifdef CONFIG_NET_CLS_ACT
 633static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 634{
 635	struct Qdisc *sch = child->__parent;
 636	struct cbq_sched_data *q = qdisc_priv(sch);
 637	struct cbq_class *cl = q->rx_class;
 638
 639	q->rx_class = NULL;
 640
 641	if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 642		int ret;
 643
 644		cbq_mark_toplevel(q, cl);
 645
 646		q->rx_class = cl;
 647		cl->q->__parent = sch;
 648
 649		ret = qdisc_enqueue(skb, cl->q);
 650		if (ret == NET_XMIT_SUCCESS) {
 651			sch->q.qlen++;
 652			if (!cl->next_alive)
 653				cbq_activate_class(cl);
 654			return 0;
 655		}
 656		if (net_xmit_drop_count(ret))
 657			sch->qstats.drops++;
 658		return 0;
 659	}
 660
 661	sch->qstats.drops++;
 662	return -1;
 663}
 664#endif
 665
 666/*
 667 * It is mission critical procedure.
 668 *
 669 * We "regenerate" toplevel cutoff, if transmitting class
 670 * has backlog and it is not regulated. It is not part of
 671 * original CBQ description, but looks more reasonable.
 672 * Probably, it is wrong. This question needs further investigation.
 673 */
 674
 675static inline void
 676cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 677		    struct cbq_class *borrowed)
 678{
 679	if (cl && q->toplevel >= borrowed->level) {
 680		if (cl->q->q.qlen > 1) {
 681			do {
 682				if (borrowed->undertime == PSCHED_PASTPERFECT) {
 683					q->toplevel = borrowed->level;
 684					return;
 685				}
 686			} while ((borrowed = borrowed->borrow) != NULL);
 687		}
 688#if 0
 689	/* It is not necessary now. Uncommenting it
 690	   will save CPU cycles, but decrease fairness.
 691	 */
 692		q->toplevel = TC_CBQ_MAXLEVEL;
 693#endif
 694	}
 695}
 696
 697static void
 698cbq_update(struct cbq_sched_data *q)
 699{
 700	struct cbq_class *this = q->tx_class;
 701	struct cbq_class *cl = this;
 702	int len = q->tx_len;
 703
 704	q->tx_class = NULL;
 705
 706	for ( ; cl; cl = cl->share) {
 707		long avgidle = cl->avgidle;
 708		long idle;
 709
 710		cl->bstats.packets++;
 711		cl->bstats.bytes += len;
 712
 713		/*
 714		 * (now - last) is total time between packet right edges.
 715		 * (last_pktlen/rate) is "virtual" busy time, so that
 716		 *
 717		 *	idle = (now - last) - last_pktlen/rate
 718		 */
 719
 720		idle = q->now - cl->last;
 721		if ((unsigned long)idle > 128*1024*1024) {
 722			avgidle = cl->maxidle;
 723		} else {
 724			idle -= L2T(cl, len);
 725
 726		/* true_avgidle := (1-W)*true_avgidle + W*idle,
 727		 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
 728		 * cl->avgidle == true_avgidle/W,
 729		 * hence:
 730		 */
 731			avgidle += idle - (avgidle>>cl->ewma_log);
 732		}
 733
 734		if (avgidle <= 0) {
 735			/* Overlimit or at-limit */
 736
 737			if (avgidle < cl->minidle)
 738				avgidle = cl->minidle;
 739
 740			cl->avgidle = avgidle;
 741
 742			/* Calculate expected time, when this class
 743			 * will be allowed to send.
 744			 * It will occur, when:
 745			 * (1-W)*true_avgidle + W*delay = 0, i.e.
 746			 * idle = (1/W - 1)*(-true_avgidle)
 747			 * or
 748			 * idle = (1 - W)*(-cl->avgidle);
 749			 */
 750			idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 751
 752			/*
 753			 * That is not all.
 754			 * To maintain the rate allocated to the class,
 755			 * we add to undertime virtual clock,
 756			 * necessary to complete transmitted packet.
 757			 * (len/phys_bandwidth has been already passed
 758			 * to the moment of cbq_update)
 759			 */
 760
 761			idle -= L2T(&q->link, len);
 762			idle += L2T(cl, len);
 763
 764			cl->undertime = q->now + idle;
 765		} else {
 766			/* Underlimit */
 767
 768			cl->undertime = PSCHED_PASTPERFECT;
 769			if (avgidle > cl->maxidle)
 770				cl->avgidle = cl->maxidle;
 771			else
 772				cl->avgidle = avgidle;
 773		}
 774		cl->last = q->now;
 775	}
 776
 777	cbq_update_toplevel(q, this, q->tx_borrowed);
 778}
 779
 780static inline struct cbq_class *
 781cbq_under_limit(struct cbq_class *cl)
 782{
 783	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 784	struct cbq_class *this_cl = cl;
 785
 786	if (cl->tparent == NULL)
 787		return cl;
 788
 789	if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 790		cl->delayed = 0;
 791		return cl;
 792	}
 793
 794	do {
 795		/* It is very suspicious place. Now overlimit
 796		 * action is generated for not bounded classes
 797		 * only if link is completely congested.
 798		 * Though it is in agree with ancestor-only paradigm,
 799		 * it looks very stupid. Particularly,
 800		 * it means that this chunk of code will either
 801		 * never be called or result in strong amplification
 802		 * of burstiness. Dangerous, silly, and, however,
 803		 * no another solution exists.
 804		 */
 805		cl = cl->borrow;
 806		if (!cl) {
 807			this_cl->qstats.overlimits++;
 808			this_cl->overlimit(this_cl);
 809			return NULL;
 810		}
 811		if (cl->level > q->toplevel)
 812			return NULL;
 813	} while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 814
 815	cl->delayed = 0;
 816	return cl;
 817}
 818
 819static inline struct sk_buff *
 820cbq_dequeue_prio(struct Qdisc *sch, int prio)
 821{
 822	struct cbq_sched_data *q = qdisc_priv(sch);
 823	struct cbq_class *cl_tail, *cl_prev, *cl;
 824	struct sk_buff *skb;
 825	int deficit;
 826
 827	cl_tail = cl_prev = q->active[prio];
 828	cl = cl_prev->next_alive;
 829
 830	do {
 831		deficit = 0;
 832
 833		/* Start round */
 834		do {
 835			struct cbq_class *borrow = cl;
 836
 837			if (cl->q->q.qlen &&
 838			    (borrow = cbq_under_limit(cl)) == NULL)
 839				goto skip_class;
 840
 841			if (cl->deficit <= 0) {
 842				/* Class exhausted its allotment per
 843				 * this round. Switch to the next one.
 844				 */
 845				deficit = 1;
 846				cl->deficit += cl->quantum;
 847				goto next_class;
 848			}
 849
 850			skb = cl->q->dequeue(cl->q);
 851
 852			/* Class did not give us any skb :-(
 853			 * It could occur even if cl->q->q.qlen != 0
 854			 * f.e. if cl->q == "tbf"
 855			 */
 856			if (skb == NULL)
 857				goto skip_class;
 858
 859			cl->deficit -= qdisc_pkt_len(skb);
 860			q->tx_class = cl;
 861			q->tx_borrowed = borrow;
 862			if (borrow != cl) {
 863#ifndef CBQ_XSTATS_BORROWS_BYTES
 864				borrow->xstats.borrows++;
 865				cl->xstats.borrows++;
 866#else
 867				borrow->xstats.borrows += qdisc_pkt_len(skb);
 868				cl->xstats.borrows += qdisc_pkt_len(skb);
 869#endif
 870			}
 871			q->tx_len = qdisc_pkt_len(skb);
 872
 873			if (cl->deficit <= 0) {
 874				q->active[prio] = cl;
 875				cl = cl->next_alive;
 876				cl->deficit += cl->quantum;
 877			}
 878			return skb;
 879
 880skip_class:
 881			if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 882				/* Class is empty or penalized.
 883				 * Unlink it from active chain.
 884				 */
 885				cl_prev->next_alive = cl->next_alive;
 886				cl->next_alive = NULL;
 887
 888				/* Did cl_tail point to it? */
 889				if (cl == cl_tail) {
 890					/* Repair it! */
 891					cl_tail = cl_prev;
 892
 893					/* Was it the last class in this band? */
 894					if (cl == cl_tail) {
 895						/* Kill the band! */
 896						q->active[prio] = NULL;
 897						q->activemask &= ~(1<<prio);
 898						if (cl->q->q.qlen)
 899							cbq_activate_class(cl);
 900						return NULL;
 901					}
 902
 903					q->active[prio] = cl_tail;
 904				}
 905				if (cl->q->q.qlen)
 906					cbq_activate_class(cl);
 907
 908				cl = cl_prev;
 909			}
 910
 911next_class:
 912			cl_prev = cl;
 913			cl = cl->next_alive;
 914		} while (cl_prev != cl_tail);
 915	} while (deficit);
 916
 917	q->active[prio] = cl_prev;
 918
 919	return NULL;
 920}
 921
 922static inline struct sk_buff *
 923cbq_dequeue_1(struct Qdisc *sch)
 924{
 925	struct cbq_sched_data *q = qdisc_priv(sch);
 926	struct sk_buff *skb;
 927	unsigned int activemask;
 928
 929	activemask = q->activemask & 0xFF;
 930	while (activemask) {
 931		int prio = ffz(~activemask);
 932		activemask &= ~(1<<prio);
 933		skb = cbq_dequeue_prio(sch, prio);
 934		if (skb)
 935			return skb;
 936	}
 937	return NULL;
 938}
 939
 940static struct sk_buff *
 941cbq_dequeue(struct Qdisc *sch)
 942{
 943	struct sk_buff *skb;
 944	struct cbq_sched_data *q = qdisc_priv(sch);
 945	psched_time_t now;
 946	psched_tdiff_t incr;
 947
 948	now = psched_get_time();
 949	incr = now - q->now_rt;
 950
 951	if (q->tx_class) {
 952		psched_tdiff_t incr2;
 953		/* Time integrator. We calculate EOS time
 954		 * by adding expected packet transmission time.
 955		 * If real time is greater, we warp artificial clock,
 956		 * so that:
 957		 *
 958		 * cbq_time = max(real_time, work);
 959		 */
 960		incr2 = L2T(&q->link, q->tx_len);
 961		q->now += incr2;
 962		cbq_update(q);
 963		if ((incr -= incr2) < 0)
 964			incr = 0;
 
 
 
 
 965	}
 966	q->now += incr;
 967	q->now_rt = now;
 968
 969	for (;;) {
 970		q->wd_expires = 0;
 971
 972		skb = cbq_dequeue_1(sch);
 973		if (skb) {
 974			qdisc_bstats_update(sch, skb);
 975			sch->q.qlen--;
 976			qdisc_unthrottled(sch);
 977			return skb;
 978		}
 979
 980		/* All the classes are overlimit.
 981		 *
 982		 * It is possible, if:
 983		 *
 984		 * 1. Scheduler is empty.
 985		 * 2. Toplevel cutoff inhibited borrowing.
 986		 * 3. Root class is overlimit.
 987		 *
 988		 * Reset 2d and 3d conditions and retry.
 989		 *
 990		 * Note, that NS and cbq-2.0 are buggy, peeking
 991		 * an arbitrary class is appropriate for ancestor-only
 992		 * sharing, but not for toplevel algorithm.
 993		 *
 994		 * Our version is better, but slower, because it requires
 995		 * two passes, but it is unavoidable with top-level sharing.
 996		 */
 997
 998		if (q->toplevel == TC_CBQ_MAXLEVEL &&
 999		    q->link.undertime == PSCHED_PASTPERFECT)
1000			break;
1001
1002		q->toplevel = TC_CBQ_MAXLEVEL;
1003		q->link.undertime = PSCHED_PASTPERFECT;
1004	}
1005
1006	/* No packets in scheduler or nobody wants to give them to us :-(
1007	 * Sigh... start watchdog timer in the last case.
1008	 */
1009
1010	if (sch->q.qlen) {
1011		sch->qstats.overlimits++;
1012		if (q->wd_expires)
1013			qdisc_watchdog_schedule(&q->watchdog,
1014						now + q->wd_expires);
1015	}
1016	return NULL;
1017}
1018
1019/* CBQ class maintanance routines */
1020
1021static void cbq_adjust_levels(struct cbq_class *this)
1022{
1023	if (this == NULL)
1024		return;
1025
1026	do {
1027		int level = 0;
1028		struct cbq_class *cl;
1029
1030		cl = this->children;
1031		if (cl) {
1032			do {
1033				if (cl->level > level)
1034					level = cl->level;
1035			} while ((cl = cl->sibling) != this->children);
1036		}
1037		this->level = level + 1;
1038	} while ((this = this->tparent) != NULL);
1039}
1040
1041static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1042{
1043	struct cbq_class *cl;
1044	struct hlist_node *n;
1045	unsigned int h;
1046
1047	if (q->quanta[prio] == 0)
1048		return;
1049
1050	for (h = 0; h < q->clhash.hashsize; h++) {
1051		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1052			/* BUGGGG... Beware! This expression suffer of
1053			 * arithmetic overflows!
1054			 */
1055			if (cl->priority == prio) {
1056				cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1057					q->quanta[prio];
1058			}
1059			if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1060				pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1061					   cl->common.classid, cl->quantum);
 
1062				cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1063			}
1064		}
1065	}
1066}
1067
1068static void cbq_sync_defmap(struct cbq_class *cl)
1069{
1070	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1071	struct cbq_class *split = cl->split;
1072	unsigned int h;
1073	int i;
1074
1075	if (split == NULL)
1076		return;
1077
1078	for (i = 0; i <= TC_PRIO_MAX; i++) {
1079		if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1080			split->defaults[i] = NULL;
1081	}
1082
1083	for (i = 0; i <= TC_PRIO_MAX; i++) {
1084		int level = split->level;
1085
1086		if (split->defaults[i])
1087			continue;
1088
1089		for (h = 0; h < q->clhash.hashsize; h++) {
1090			struct hlist_node *n;
1091			struct cbq_class *c;
1092
1093			hlist_for_each_entry(c, n, &q->clhash.hash[h],
1094					     common.hnode) {
1095				if (c->split == split && c->level < level &&
1096				    c->defmap & (1<<i)) {
1097					split->defaults[i] = c;
1098					level = c->level;
1099				}
1100			}
1101		}
1102	}
1103}
1104
1105static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1106{
1107	struct cbq_class *split = NULL;
1108
1109	if (splitid == 0) {
1110		split = cl->split;
1111		if (!split)
1112			return;
1113		splitid = split->common.classid;
1114	}
1115
1116	if (split == NULL || split->common.classid != splitid) {
1117		for (split = cl->tparent; split; split = split->tparent)
1118			if (split->common.classid == splitid)
1119				break;
1120	}
1121
1122	if (split == NULL)
1123		return;
1124
1125	if (cl->split != split) {
1126		cl->defmap = 0;
1127		cbq_sync_defmap(cl);
1128		cl->split = split;
1129		cl->defmap = def & mask;
1130	} else
1131		cl->defmap = (cl->defmap & ~mask) | (def & mask);
1132
1133	cbq_sync_defmap(cl);
1134}
1135
1136static void cbq_unlink_class(struct cbq_class *this)
1137{
1138	struct cbq_class *cl, **clp;
1139	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1140
1141	qdisc_class_hash_remove(&q->clhash, &this->common);
1142
1143	if (this->tparent) {
1144		clp = &this->sibling;
1145		cl = *clp;
1146		do {
1147			if (cl == this) {
1148				*clp = cl->sibling;
1149				break;
1150			}
1151			clp = &cl->sibling;
1152		} while ((cl = *clp) != this->sibling);
1153
1154		if (this->tparent->children == this) {
1155			this->tparent->children = this->sibling;
1156			if (this->sibling == this)
1157				this->tparent->children = NULL;
1158		}
1159	} else {
1160		WARN_ON(this->sibling != this);
1161	}
1162}
1163
1164static void cbq_link_class(struct cbq_class *this)
1165{
1166	struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1167	struct cbq_class *parent = this->tparent;
1168
1169	this->sibling = this;
1170	qdisc_class_hash_insert(&q->clhash, &this->common);
1171
1172	if (parent == NULL)
1173		return;
1174
1175	if (parent->children == NULL) {
1176		parent->children = this;
1177	} else {
1178		this->sibling = parent->children->sibling;
1179		parent->children->sibling = this;
1180	}
1181}
1182
1183static unsigned int cbq_drop(struct Qdisc *sch)
1184{
1185	struct cbq_sched_data *q = qdisc_priv(sch);
1186	struct cbq_class *cl, *cl_head;
1187	int prio;
1188	unsigned int len;
1189
1190	for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1191		cl_head = q->active[prio];
1192		if (!cl_head)
1193			continue;
1194
1195		cl = cl_head;
1196		do {
1197			if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1198				sch->q.qlen--;
1199				if (!cl->q->q.qlen)
1200					cbq_deactivate_class(cl);
1201				return len;
1202			}
1203		} while ((cl = cl->next_alive) != cl_head);
1204	}
1205	return 0;
1206}
1207
1208static void
1209cbq_reset(struct Qdisc *sch)
1210{
1211	struct cbq_sched_data *q = qdisc_priv(sch);
1212	struct cbq_class *cl;
1213	struct hlist_node *n;
1214	int prio;
1215	unsigned int h;
1216
1217	q->activemask = 0;
1218	q->pmask = 0;
1219	q->tx_class = NULL;
1220	q->tx_borrowed = NULL;
1221	qdisc_watchdog_cancel(&q->watchdog);
1222	hrtimer_cancel(&q->delay_timer);
1223	q->toplevel = TC_CBQ_MAXLEVEL;
1224	q->now = psched_get_time();
1225	q->now_rt = q->now;
1226
1227	for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1228		q->active[prio] = NULL;
1229
1230	for (h = 0; h < q->clhash.hashsize; h++) {
1231		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1232			qdisc_reset(cl->q);
1233
1234			cl->next_alive = NULL;
1235			cl->undertime = PSCHED_PASTPERFECT;
1236			cl->avgidle = cl->maxidle;
1237			cl->deficit = cl->quantum;
1238			cl->cpriority = cl->priority;
1239		}
1240	}
1241	sch->q.qlen = 0;
1242}
1243
1244
1245static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1246{
1247	if (lss->change & TCF_CBQ_LSS_FLAGS) {
1248		cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1249		cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1250	}
1251	if (lss->change & TCF_CBQ_LSS_EWMA)
1252		cl->ewma_log = lss->ewma_log;
1253	if (lss->change & TCF_CBQ_LSS_AVPKT)
1254		cl->avpkt = lss->avpkt;
1255	if (lss->change & TCF_CBQ_LSS_MINIDLE)
1256		cl->minidle = -(long)lss->minidle;
1257	if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1258		cl->maxidle = lss->maxidle;
1259		cl->avgidle = lss->maxidle;
1260	}
1261	if (lss->change & TCF_CBQ_LSS_OFFTIME)
1262		cl->offtime = lss->offtime;
1263	return 0;
1264}
1265
1266static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1267{
1268	q->nclasses[cl->priority]--;
1269	q->quanta[cl->priority] -= cl->weight;
1270	cbq_normalize_quanta(q, cl->priority);
1271}
1272
1273static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1274{
1275	q->nclasses[cl->priority]++;
1276	q->quanta[cl->priority] += cl->weight;
1277	cbq_normalize_quanta(q, cl->priority);
1278}
1279
1280static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1281{
1282	struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1283
1284	if (wrr->allot)
1285		cl->allot = wrr->allot;
1286	if (wrr->weight)
1287		cl->weight = wrr->weight;
1288	if (wrr->priority) {
1289		cl->priority = wrr->priority - 1;
1290		cl->cpriority = cl->priority;
1291		if (cl->priority >= cl->priority2)
1292			cl->priority2 = TC_CBQ_MAXPRIO - 1;
1293	}
1294
1295	cbq_addprio(q, cl);
1296	return 0;
1297}
1298
1299static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1300{
1301	switch (ovl->strategy) {
1302	case TC_CBQ_OVL_CLASSIC:
1303		cl->overlimit = cbq_ovl_classic;
1304		break;
1305	case TC_CBQ_OVL_DELAY:
1306		cl->overlimit = cbq_ovl_delay;
1307		break;
1308	case TC_CBQ_OVL_LOWPRIO:
1309		if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1310		    ovl->priority2 - 1 <= cl->priority)
1311			return -EINVAL;
1312		cl->priority2 = ovl->priority2 - 1;
1313		cl->overlimit = cbq_ovl_lowprio;
1314		break;
1315	case TC_CBQ_OVL_DROP:
1316		cl->overlimit = cbq_ovl_drop;
1317		break;
1318	case TC_CBQ_OVL_RCLASSIC:
1319		cl->overlimit = cbq_ovl_rclassic;
1320		break;
1321	default:
1322		return -EINVAL;
1323	}
1324	cl->penalty = ovl->penalty;
1325	return 0;
1326}
1327
1328#ifdef CONFIG_NET_CLS_ACT
1329static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1330{
1331	cl->police = p->police;
1332
1333	if (cl->q->handle) {
1334		if (p->police == TC_POLICE_RECLASSIFY)
1335			cl->q->reshape_fail = cbq_reshape_fail;
1336		else
1337			cl->q->reshape_fail = NULL;
1338	}
1339	return 0;
1340}
1341#endif
1342
1343static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1344{
1345	cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1346	return 0;
1347}
1348
1349static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1350	[TCA_CBQ_LSSOPT]	= { .len = sizeof(struct tc_cbq_lssopt) },
1351	[TCA_CBQ_WRROPT]	= { .len = sizeof(struct tc_cbq_wrropt) },
1352	[TCA_CBQ_FOPT]		= { .len = sizeof(struct tc_cbq_fopt) },
1353	[TCA_CBQ_OVL_STRATEGY]	= { .len = sizeof(struct tc_cbq_ovl) },
1354	[TCA_CBQ_RATE]		= { .len = sizeof(struct tc_ratespec) },
1355	[TCA_CBQ_RTAB]		= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1356	[TCA_CBQ_POLICE]	= { .len = sizeof(struct tc_cbq_police) },
1357};
1358
1359static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1360{
1361	struct cbq_sched_data *q = qdisc_priv(sch);
1362	struct nlattr *tb[TCA_CBQ_MAX + 1];
1363	struct tc_ratespec *r;
1364	int err;
1365
1366	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1367	if (err < 0)
1368		return err;
1369
1370	if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1371		return -EINVAL;
1372
1373	r = nla_data(tb[TCA_CBQ_RATE]);
1374
1375	if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1376		return -EINVAL;
1377
1378	err = qdisc_class_hash_init(&q->clhash);
1379	if (err < 0)
1380		goto put_rtab;
1381
1382	q->link.refcnt = 1;
1383	q->link.sibling = &q->link;
1384	q->link.common.classid = sch->handle;
1385	q->link.qdisc = sch;
1386	q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1387				      sch->handle);
1388	if (!q->link.q)
1389		q->link.q = &noop_qdisc;
1390
1391	q->link.priority = TC_CBQ_MAXPRIO - 1;
1392	q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1393	q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1394	q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1395	q->link.overlimit = cbq_ovl_classic;
1396	q->link.allot = psched_mtu(qdisc_dev(sch));
1397	q->link.quantum = q->link.allot;
1398	q->link.weight = q->link.R_tab->rate.rate;
1399
1400	q->link.ewma_log = TC_CBQ_DEF_EWMA;
1401	q->link.avpkt = q->link.allot/2;
1402	q->link.minidle = -0x7FFFFFFF;
1403
1404	qdisc_watchdog_init(&q->watchdog, sch);
1405	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1406	q->delay_timer.function = cbq_undelay;
1407	q->toplevel = TC_CBQ_MAXLEVEL;
1408	q->now = psched_get_time();
1409	q->now_rt = q->now;
1410
1411	cbq_link_class(&q->link);
1412
1413	if (tb[TCA_CBQ_LSSOPT])
1414		cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1415
1416	cbq_addprio(q, &q->link);
1417	return 0;
1418
1419put_rtab:
1420	qdisc_put_rtab(q->link.R_tab);
1421	return err;
1422}
1423
1424static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1425{
1426	unsigned char *b = skb_tail_pointer(skb);
1427
1428	NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
 
1429	return skb->len;
1430
1431nla_put_failure:
1432	nlmsg_trim(skb, b);
1433	return -1;
1434}
1435
1436static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1437{
1438	unsigned char *b = skb_tail_pointer(skb);
1439	struct tc_cbq_lssopt opt;
1440
1441	opt.flags = 0;
1442	if (cl->borrow == NULL)
1443		opt.flags |= TCF_CBQ_LSS_BOUNDED;
1444	if (cl->share == NULL)
1445		opt.flags |= TCF_CBQ_LSS_ISOLATED;
1446	opt.ewma_log = cl->ewma_log;
1447	opt.level = cl->level;
1448	opt.avpkt = cl->avpkt;
1449	opt.maxidle = cl->maxidle;
1450	opt.minidle = (u32)(-cl->minidle);
1451	opt.offtime = cl->offtime;
1452	opt.change = ~0;
1453	NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
 
1454	return skb->len;
1455
1456nla_put_failure:
1457	nlmsg_trim(skb, b);
1458	return -1;
1459}
1460
1461static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1462{
1463	unsigned char *b = skb_tail_pointer(skb);
1464	struct tc_cbq_wrropt opt;
1465
 
1466	opt.flags = 0;
1467	opt.allot = cl->allot;
1468	opt.priority = cl->priority + 1;
1469	opt.cpriority = cl->cpriority + 1;
1470	opt.weight = cl->weight;
1471	NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
 
1472	return skb->len;
1473
1474nla_put_failure:
1475	nlmsg_trim(skb, b);
1476	return -1;
1477}
1478
1479static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1480{
1481	unsigned char *b = skb_tail_pointer(skb);
1482	struct tc_cbq_ovl opt;
1483
1484	opt.strategy = cl->ovl_strategy;
1485	opt.priority2 = cl->priority2 + 1;
1486	opt.pad = 0;
1487	opt.penalty = cl->penalty;
1488	NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
 
1489	return skb->len;
1490
1491nla_put_failure:
1492	nlmsg_trim(skb, b);
1493	return -1;
1494}
1495
1496static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1497{
1498	unsigned char *b = skb_tail_pointer(skb);
1499	struct tc_cbq_fopt opt;
1500
1501	if (cl->split || cl->defmap) {
1502		opt.split = cl->split ? cl->split->common.classid : 0;
1503		opt.defmap = cl->defmap;
1504		opt.defchange = ~0;
1505		NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
 
1506	}
1507	return skb->len;
1508
1509nla_put_failure:
1510	nlmsg_trim(skb, b);
1511	return -1;
1512}
1513
1514#ifdef CONFIG_NET_CLS_ACT
1515static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1516{
1517	unsigned char *b = skb_tail_pointer(skb);
1518	struct tc_cbq_police opt;
1519
1520	if (cl->police) {
1521		opt.police = cl->police;
1522		opt.__res1 = 0;
1523		opt.__res2 = 0;
1524		NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
 
1525	}
1526	return skb->len;
1527
1528nla_put_failure:
1529	nlmsg_trim(skb, b);
1530	return -1;
1531}
1532#endif
1533
1534static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1535{
1536	if (cbq_dump_lss(skb, cl) < 0 ||
1537	    cbq_dump_rate(skb, cl) < 0 ||
1538	    cbq_dump_wrr(skb, cl) < 0 ||
1539	    cbq_dump_ovl(skb, cl) < 0 ||
1540#ifdef CONFIG_NET_CLS_ACT
1541	    cbq_dump_police(skb, cl) < 0 ||
1542#endif
1543	    cbq_dump_fopt(skb, cl) < 0)
1544		return -1;
1545	return 0;
1546}
1547
1548static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1549{
1550	struct cbq_sched_data *q = qdisc_priv(sch);
1551	struct nlattr *nest;
1552
1553	nest = nla_nest_start(skb, TCA_OPTIONS);
1554	if (nest == NULL)
1555		goto nla_put_failure;
1556	if (cbq_dump_attr(skb, &q->link) < 0)
1557		goto nla_put_failure;
1558	nla_nest_end(skb, nest);
1559	return skb->len;
1560
1561nla_put_failure:
1562	nla_nest_cancel(skb, nest);
1563	return -1;
1564}
1565
1566static int
1567cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1568{
1569	struct cbq_sched_data *q = qdisc_priv(sch);
1570
1571	q->link.xstats.avgidle = q->link.avgidle;
1572	return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1573}
1574
1575static int
1576cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1577	       struct sk_buff *skb, struct tcmsg *tcm)
1578{
1579	struct cbq_class *cl = (struct cbq_class *)arg;
1580	struct nlattr *nest;
1581
1582	if (cl->tparent)
1583		tcm->tcm_parent = cl->tparent->common.classid;
1584	else
1585		tcm->tcm_parent = TC_H_ROOT;
1586	tcm->tcm_handle = cl->common.classid;
1587	tcm->tcm_info = cl->q->handle;
1588
1589	nest = nla_nest_start(skb, TCA_OPTIONS);
1590	if (nest == NULL)
1591		goto nla_put_failure;
1592	if (cbq_dump_attr(skb, cl) < 0)
1593		goto nla_put_failure;
1594	nla_nest_end(skb, nest);
1595	return skb->len;
1596
1597nla_put_failure:
1598	nla_nest_cancel(skb, nest);
1599	return -1;
1600}
1601
1602static int
1603cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1604	struct gnet_dump *d)
1605{
1606	struct cbq_sched_data *q = qdisc_priv(sch);
1607	struct cbq_class *cl = (struct cbq_class *)arg;
1608
1609	cl->qstats.qlen = cl->q->q.qlen;
1610	cl->xstats.avgidle = cl->avgidle;
1611	cl->xstats.undertime = 0;
1612
1613	if (cl->undertime != PSCHED_PASTPERFECT)
1614		cl->xstats.undertime = cl->undertime - q->now;
1615
1616	if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1617	    gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1618	    gnet_stats_copy_queue(d, &cl->qstats) < 0)
1619		return -1;
1620
1621	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1622}
1623
1624static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1625		     struct Qdisc **old)
1626{
1627	struct cbq_class *cl = (struct cbq_class *)arg;
1628
1629	if (new == NULL) {
1630		new = qdisc_create_dflt(sch->dev_queue,
1631					&pfifo_qdisc_ops, cl->common.classid);
1632		if (new == NULL)
1633			return -ENOBUFS;
1634	} else {
1635#ifdef CONFIG_NET_CLS_ACT
1636		if (cl->police == TC_POLICE_RECLASSIFY)
1637			new->reshape_fail = cbq_reshape_fail;
1638#endif
1639	}
1640	sch_tree_lock(sch);
1641	*old = cl->q;
1642	cl->q = new;
1643	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1644	qdisc_reset(*old);
1645	sch_tree_unlock(sch);
1646
1647	return 0;
1648}
1649
1650static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1651{
1652	struct cbq_class *cl = (struct cbq_class *)arg;
1653
1654	return cl->q;
1655}
1656
1657static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1658{
1659	struct cbq_class *cl = (struct cbq_class *)arg;
1660
1661	if (cl->q->q.qlen == 0)
1662		cbq_deactivate_class(cl);
1663}
1664
1665static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1666{
1667	struct cbq_sched_data *q = qdisc_priv(sch);
1668	struct cbq_class *cl = cbq_class_lookup(q, classid);
1669
1670	if (cl) {
1671		cl->refcnt++;
1672		return (unsigned long)cl;
1673	}
1674	return 0;
1675}
1676
1677static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1678{
1679	struct cbq_sched_data *q = qdisc_priv(sch);
1680
1681	WARN_ON(cl->filters);
1682
1683	tcf_destroy_chain(&cl->filter_list);
1684	qdisc_destroy(cl->q);
1685	qdisc_put_rtab(cl->R_tab);
1686	gen_kill_estimator(&cl->bstats, &cl->rate_est);
1687	if (cl != &q->link)
1688		kfree(cl);
1689}
1690
1691static void cbq_destroy(struct Qdisc *sch)
1692{
1693	struct cbq_sched_data *q = qdisc_priv(sch);
1694	struct hlist_node *n, *next;
1695	struct cbq_class *cl;
1696	unsigned int h;
1697
1698#ifdef CONFIG_NET_CLS_ACT
1699	q->rx_class = NULL;
1700#endif
1701	/*
1702	 * Filters must be destroyed first because we don't destroy the
1703	 * classes from root to leafs which means that filters can still
1704	 * be bound to classes which have been destroyed already. --TGR '04
1705	 */
1706	for (h = 0; h < q->clhash.hashsize; h++) {
1707		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1708			tcf_destroy_chain(&cl->filter_list);
1709	}
1710	for (h = 0; h < q->clhash.hashsize; h++) {
1711		hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1712					  common.hnode)
1713			cbq_destroy_class(sch, cl);
1714	}
1715	qdisc_class_hash_destroy(&q->clhash);
1716}
1717
1718static void cbq_put(struct Qdisc *sch, unsigned long arg)
1719{
1720	struct cbq_class *cl = (struct cbq_class *)arg;
1721
1722	if (--cl->refcnt == 0) {
1723#ifdef CONFIG_NET_CLS_ACT
1724		spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1725		struct cbq_sched_data *q = qdisc_priv(sch);
1726
1727		spin_lock_bh(root_lock);
1728		if (q->rx_class == cl)
1729			q->rx_class = NULL;
1730		spin_unlock_bh(root_lock);
1731#endif
1732
1733		cbq_destroy_class(sch, cl);
1734	}
1735}
1736
1737static int
1738cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1739		 unsigned long *arg)
1740{
1741	int err;
1742	struct cbq_sched_data *q = qdisc_priv(sch);
1743	struct cbq_class *cl = (struct cbq_class *)*arg;
1744	struct nlattr *opt = tca[TCA_OPTIONS];
1745	struct nlattr *tb[TCA_CBQ_MAX + 1];
1746	struct cbq_class *parent;
1747	struct qdisc_rate_table *rtab = NULL;
1748
1749	if (opt == NULL)
1750		return -EINVAL;
1751
1752	err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1753	if (err < 0)
1754		return err;
1755
1756	if (cl) {
1757		/* Check parent */
1758		if (parentid) {
1759			if (cl->tparent &&
1760			    cl->tparent->common.classid != parentid)
1761				return -EINVAL;
1762			if (!cl->tparent && parentid != TC_H_ROOT)
1763				return -EINVAL;
1764		}
1765
1766		if (tb[TCA_CBQ_RATE]) {
1767			rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1768					      tb[TCA_CBQ_RTAB]);
1769			if (rtab == NULL)
1770				return -EINVAL;
1771		}
1772
1773		if (tca[TCA_RATE]) {
1774			err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1775						    qdisc_root_sleeping_lock(sch),
1776						    tca[TCA_RATE]);
1777			if (err) {
1778				if (rtab)
1779					qdisc_put_rtab(rtab);
1780				return err;
1781			}
1782		}
1783
1784		/* Change class parameters */
1785		sch_tree_lock(sch);
1786
1787		if (cl->next_alive != NULL)
1788			cbq_deactivate_class(cl);
1789
1790		if (rtab) {
1791			qdisc_put_rtab(cl->R_tab);
1792			cl->R_tab = rtab;
1793		}
1794
1795		if (tb[TCA_CBQ_LSSOPT])
1796			cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1797
1798		if (tb[TCA_CBQ_WRROPT]) {
1799			cbq_rmprio(q, cl);
1800			cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1801		}
1802
1803		if (tb[TCA_CBQ_OVL_STRATEGY])
1804			cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1805
1806#ifdef CONFIG_NET_CLS_ACT
1807		if (tb[TCA_CBQ_POLICE])
1808			cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1809#endif
1810
1811		if (tb[TCA_CBQ_FOPT])
1812			cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1813
1814		if (cl->q->q.qlen)
1815			cbq_activate_class(cl);
1816
1817		sch_tree_unlock(sch);
1818
1819		return 0;
1820	}
1821
1822	if (parentid == TC_H_ROOT)
1823		return -EINVAL;
1824
1825	if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1826	    tb[TCA_CBQ_LSSOPT] == NULL)
1827		return -EINVAL;
1828
1829	rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1830	if (rtab == NULL)
1831		return -EINVAL;
1832
1833	if (classid) {
1834		err = -EINVAL;
1835		if (TC_H_MAJ(classid ^ sch->handle) ||
1836		    cbq_class_lookup(q, classid))
1837			goto failure;
1838	} else {
1839		int i;
1840		classid = TC_H_MAKE(sch->handle, 0x8000);
1841
1842		for (i = 0; i < 0x8000; i++) {
1843			if (++q->hgenerator >= 0x8000)
1844				q->hgenerator = 1;
1845			if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1846				break;
1847		}
1848		err = -ENOSR;
1849		if (i >= 0x8000)
1850			goto failure;
1851		classid = classid|q->hgenerator;
1852	}
1853
1854	parent = &q->link;
1855	if (parentid) {
1856		parent = cbq_class_lookup(q, parentid);
1857		err = -EINVAL;
1858		if (parent == NULL)
1859			goto failure;
1860	}
1861
1862	err = -ENOBUFS;
1863	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1864	if (cl == NULL)
1865		goto failure;
1866
1867	if (tca[TCA_RATE]) {
1868		err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1869					qdisc_root_sleeping_lock(sch),
1870					tca[TCA_RATE]);
1871		if (err) {
1872			kfree(cl);
1873			goto failure;
1874		}
1875	}
1876
1877	cl->R_tab = rtab;
1878	rtab = NULL;
1879	cl->refcnt = 1;
1880	cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1881	if (!cl->q)
1882		cl->q = &noop_qdisc;
1883	cl->common.classid = classid;
1884	cl->tparent = parent;
1885	cl->qdisc = sch;
1886	cl->allot = parent->allot;
1887	cl->quantum = cl->allot;
1888	cl->weight = cl->R_tab->rate.rate;
1889
1890	sch_tree_lock(sch);
1891	cbq_link_class(cl);
1892	cl->borrow = cl->tparent;
1893	if (cl->tparent != &q->link)
1894		cl->share = cl->tparent;
1895	cbq_adjust_levels(parent);
1896	cl->minidle = -0x7FFFFFFF;
1897	cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1898	cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1899	if (cl->ewma_log == 0)
1900		cl->ewma_log = q->link.ewma_log;
1901	if (cl->maxidle == 0)
1902		cl->maxidle = q->link.maxidle;
1903	if (cl->avpkt == 0)
1904		cl->avpkt = q->link.avpkt;
1905	cl->overlimit = cbq_ovl_classic;
1906	if (tb[TCA_CBQ_OVL_STRATEGY])
1907		cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1908#ifdef CONFIG_NET_CLS_ACT
1909	if (tb[TCA_CBQ_POLICE])
1910		cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1911#endif
1912	if (tb[TCA_CBQ_FOPT])
1913		cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1914	sch_tree_unlock(sch);
1915
1916	qdisc_class_hash_grow(sch, &q->clhash);
1917
1918	*arg = (unsigned long)cl;
1919	return 0;
1920
1921failure:
1922	qdisc_put_rtab(rtab);
1923	return err;
1924}
1925
1926static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1927{
1928	struct cbq_sched_data *q = qdisc_priv(sch);
1929	struct cbq_class *cl = (struct cbq_class *)arg;
1930	unsigned int qlen;
1931
1932	if (cl->filters || cl->children || cl == &q->link)
1933		return -EBUSY;
1934
1935	sch_tree_lock(sch);
1936
1937	qlen = cl->q->q.qlen;
1938	qdisc_reset(cl->q);
1939	qdisc_tree_decrease_qlen(cl->q, qlen);
1940
1941	if (cl->next_alive)
1942		cbq_deactivate_class(cl);
1943
1944	if (q->tx_borrowed == cl)
1945		q->tx_borrowed = q->tx_class;
1946	if (q->tx_class == cl) {
1947		q->tx_class = NULL;
1948		q->tx_borrowed = NULL;
1949	}
1950#ifdef CONFIG_NET_CLS_ACT
1951	if (q->rx_class == cl)
1952		q->rx_class = NULL;
1953#endif
1954
1955	cbq_unlink_class(cl);
1956	cbq_adjust_levels(cl->tparent);
1957	cl->defmap = 0;
1958	cbq_sync_defmap(cl);
1959
1960	cbq_rmprio(q, cl);
1961	sch_tree_unlock(sch);
1962
1963	BUG_ON(--cl->refcnt == 0);
1964	/*
1965	 * This shouldn't happen: we "hold" one cops->get() when called
1966	 * from tc_ctl_tclass; the destroy method is done from cops->put().
1967	 */
1968
1969	return 0;
1970}
1971
1972static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1973{
1974	struct cbq_sched_data *q = qdisc_priv(sch);
1975	struct cbq_class *cl = (struct cbq_class *)arg;
1976
1977	if (cl == NULL)
1978		cl = &q->link;
1979
1980	return &cl->filter_list;
1981}
1982
1983static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1984				     u32 classid)
1985{
1986	struct cbq_sched_data *q = qdisc_priv(sch);
1987	struct cbq_class *p = (struct cbq_class *)parent;
1988	struct cbq_class *cl = cbq_class_lookup(q, classid);
1989
1990	if (cl) {
1991		if (p && p->level <= cl->level)
1992			return 0;
1993		cl->filters++;
1994		return (unsigned long)cl;
1995	}
1996	return 0;
1997}
1998
1999static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2000{
2001	struct cbq_class *cl = (struct cbq_class *)arg;
2002
2003	cl->filters--;
2004}
2005
2006static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2007{
2008	struct cbq_sched_data *q = qdisc_priv(sch);
2009	struct cbq_class *cl;
2010	struct hlist_node *n;
2011	unsigned int h;
2012
2013	if (arg->stop)
2014		return;
2015
2016	for (h = 0; h < q->clhash.hashsize; h++) {
2017		hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2018			if (arg->count < arg->skip) {
2019				arg->count++;
2020				continue;
2021			}
2022			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2023				arg->stop = 1;
2024				return;
2025			}
2026			arg->count++;
2027		}
2028	}
2029}
2030
2031static const struct Qdisc_class_ops cbq_class_ops = {
2032	.graft		=	cbq_graft,
2033	.leaf		=	cbq_leaf,
2034	.qlen_notify	=	cbq_qlen_notify,
2035	.get		=	cbq_get,
2036	.put		=	cbq_put,
2037	.change		=	cbq_change_class,
2038	.delete		=	cbq_delete,
2039	.walk		=	cbq_walk,
2040	.tcf_chain	=	cbq_find_tcf,
2041	.bind_tcf	=	cbq_bind_filter,
2042	.unbind_tcf	=	cbq_unbind_filter,
2043	.dump		=	cbq_dump_class,
2044	.dump_stats	=	cbq_dump_class_stats,
2045};
2046
2047static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2048	.next		=	NULL,
2049	.cl_ops		=	&cbq_class_ops,
2050	.id		=	"cbq",
2051	.priv_size	=	sizeof(struct cbq_sched_data),
2052	.enqueue	=	cbq_enqueue,
2053	.dequeue	=	cbq_dequeue,
2054	.peek		=	qdisc_peek_dequeued,
2055	.drop		=	cbq_drop,
2056	.init		=	cbq_init,
2057	.reset		=	cbq_reset,
2058	.destroy	=	cbq_destroy,
2059	.change		=	NULL,
2060	.dump		=	cbq_dump,
2061	.dump_stats	=	cbq_dump_stats,
2062	.owner		=	THIS_MODULE,
2063};
2064
2065static int __init cbq_module_init(void)
2066{
2067	return register_qdisc(&cbq_qdisc_ops);
2068}
2069static void __exit cbq_module_exit(void)
2070{
2071	unregister_qdisc(&cbq_qdisc_ops);
2072}
2073module_init(cbq_module_init)
2074module_exit(cbq_module_exit)
2075MODULE_LICENSE("GPL");