Linux Audio

Check our new training course

Yocto / OpenEmbedded training

Mar 24-27, 2025, special US time zones
Register
Loading...
v5.4
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
   4 *
   5 * Authors:	Martin Devera, <devik@cdi.cz>
   6 *
   7 * Credits (in time order) for older HTB versions:
   8 *              Stef Coene <stef.coene@docum.org>
   9 *			HTB support at LARTC mailing list
  10 *		Ondrej Kraus, <krauso@barr.cz>
  11 *			found missing INIT_QDISC(htb)
  12 *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  13 *			helped a lot to locate nasty class stall bug
  14 *		Andi Kleen, Jamal Hadi, Bert Hubert
  15 *			code review and helpful comments on shaping
  16 *		Tomasz Wrona, <tw@eter.tym.pl>
  17 *			created test case so that I was able to fix nasty bug
  18 *		Wilfried Weissmann
  19 *			spotted bug in dequeue code and helped with fix
  20 *		Jiri Fojtasek
  21 *			fixed requeue routine
  22 *		and many others. thanks.
  23 */
  24#include <linux/module.h>
  25#include <linux/moduleparam.h>
  26#include <linux/types.h>
  27#include <linux/kernel.h>
  28#include <linux/string.h>
  29#include <linux/errno.h>
  30#include <linux/skbuff.h>
  31#include <linux/list.h>
  32#include <linux/compiler.h>
  33#include <linux/rbtree.h>
  34#include <linux/workqueue.h>
  35#include <linux/slab.h>
  36#include <net/netlink.h>
  37#include <net/sch_generic.h>
  38#include <net/pkt_sched.h>
  39#include <net/pkt_cls.h>
  40
  41/* HTB algorithm.
  42    Author: devik@cdi.cz
  43    ========================================================================
  44    HTB is like TBF with multiple classes. It is also similar to CBQ because
  45    it allows to assign priority to each class in hierarchy.
  46    In fact it is another implementation of Floyd's formal sharing.
  47
  48    Levels:
  49    Each class is assigned level. Leaf has ALWAYS level 0 and root
  50    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  51    one less than their parent.
  52*/
  53
  54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  55#define HTB_VER 0x30011		/* major must be matched with number suplied by TC as version */
  56
  57#if HTB_VER >> 16 != TC_HTB_PROTOVER
  58#error "Mismatched sch_htb.c and pkt_sch.h"
  59#endif
  60
  61/* Module parameter and sysfs export */
  62module_param    (htb_hysteresis, int, 0640);
  63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  64
  65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
  66module_param(htb_rate_est, int, 0640);
  67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
  68
  69/* used internaly to keep status of single class */
  70enum htb_cmode {
  71	HTB_CANT_SEND,		/* class can't send and can't borrow */
  72	HTB_MAY_BORROW,		/* class can't send but may borrow */
  73	HTB_CAN_SEND		/* class can send */
  74};
  75
  76struct htb_prio {
  77	union {
  78		struct rb_root	row;
  79		struct rb_root	feed;
  80	};
  81	struct rb_node	*ptr;
  82	/* When class changes from state 1->2 and disconnects from
  83	 * parent's feed then we lost ptr value and start from the
  84	 * first child again. Here we store classid of the
  85	 * last valid ptr (used when ptr is NULL).
  86	 */
  87	u32		last_ptr_id;
  88};
  89
  90/* interior & leaf nodes; props specific to leaves are marked L:
  91 * To reduce false sharing, place mostly read fields at beginning,
  92 * and mostly written ones at the end.
  93 */
  94struct htb_class {
  95	struct Qdisc_class_common common;
  96	struct psched_ratecfg	rate;
  97	struct psched_ratecfg	ceil;
  98	s64			buffer, cbuffer;/* token bucket depth/rate */
  99	s64			mbuffer;	/* max wait time */
 100	u32			prio;		/* these two are used only by leaves... */
 101	int			quantum;	/* but stored for parent-to-leaf return */
 102
 103	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
 104	struct tcf_block	*block;
 105	int			filter_cnt;
 106
 107	int			level;		/* our level (see above) */
 108	unsigned int		children;
 109	struct htb_class	*parent;	/* parent class */
 110
 111	struct net_rate_estimator __rcu *rate_est;
 112
 113	/*
 114	 * Written often fields
 115	 */
 116	struct gnet_stats_basic_packed bstats;
 
 117	struct tc_htb_xstats	xstats;	/* our special stats */
 118
 119	/* token bucket parameters */
 120	s64			tokens, ctokens;/* current number of tokens */
 121	s64			t_c;		/* checkpoint time */
 122
 123	union {
 124		struct htb_class_leaf {
 125			int		deficit[TC_HTB_MAXDEPTH];
 126			struct Qdisc	*q;
 
 127		} leaf;
 128		struct htb_class_inner {
 129			struct htb_prio clprio[TC_HTB_NUMPRIO];
 130		} inner;
 131	};
 132	s64			pq_key;
 133
 134	int			prio_activity;	/* for which prios are we active */
 135	enum htb_cmode		cmode;		/* current mode of the class */
 136	struct rb_node		pq_node;	/* node for event queue */
 137	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
 138
 139	unsigned int drops ____cacheline_aligned_in_smp;
 140	unsigned int		overlimits;
 141};
 142
 143struct htb_level {
 144	struct rb_root	wait_pq;
 145	struct htb_prio hprio[TC_HTB_NUMPRIO];
 146};
 147
 148struct htb_sched {
 149	struct Qdisc_class_hash clhash;
 150	int			defcls;		/* class where unclassified flows go to */
 151	int			rate2quantum;	/* quant = rate / rate2quantum */
 152
 153	/* filters for qdisc itself */
 154	struct tcf_proto __rcu	*filter_list;
 155	struct tcf_block	*block;
 156
 157#define HTB_WARN_TOOMANYEVENTS	0x1
 158	unsigned int		warned;	/* only one warning */
 159	int			direct_qlen;
 160	struct work_struct	work;
 161
 162	/* non shaped skbs; let them go directly thru */
 163	struct qdisc_skb_head	direct_queue;
 164	u32			direct_pkts;
 165	u32			overlimits;
 166
 167	struct qdisc_watchdog	watchdog;
 168
 169	s64			now;	/* cached dequeue time */
 170
 171	/* time of nearest event per level (row) */
 172	s64			near_ev_cache[TC_HTB_MAXDEPTH];
 173
 174	int			row_mask[TC_HTB_MAXDEPTH];
 175
 176	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
 
 
 
 
 
 177};
 178
 179/* find class in global hash table using given handle */
 180static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 181{
 182	struct htb_sched *q = qdisc_priv(sch);
 183	struct Qdisc_class_common *clc;
 184
 185	clc = qdisc_class_find(&q->clhash, handle);
 186	if (clc == NULL)
 187		return NULL;
 188	return container_of(clc, struct htb_class, common);
 189}
 190
 191static unsigned long htb_search(struct Qdisc *sch, u32 handle)
 192{
 193	return (unsigned long)htb_find(handle, sch);
 194}
 
 
 
 195/**
 196 * htb_classify - classify a packet into class
 
 
 
 197 *
 198 * It returns NULL if the packet should be dropped or -1 if the packet
 199 * should be passed directly thru. In all other cases leaf class is returned.
 200 * We allow direct class selection by classid in priority. The we examine
 201 * filters in qdisc and in inner nodes (if higher filter points to the inner
 202 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 203 * internal fifo (direct). These packets then go directly thru. If we still
 204 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
 205 * then finish and return direct queue.
 206 */
 207#define HTB_DIRECT ((struct htb_class *)-1L)
 208
 209static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 210				      int *qerr)
 211{
 212	struct htb_sched *q = qdisc_priv(sch);
 213	struct htb_class *cl;
 214	struct tcf_result res;
 215	struct tcf_proto *tcf;
 216	int result;
 217
 218	/* allow to select class by setting skb->priority to valid classid;
 219	 * note that nfmark can be used too by attaching filter fw with no
 220	 * rules in it
 221	 */
 222	if (skb->priority == sch->handle)
 223		return HTB_DIRECT;	/* X:0 (direct flow) selected */
 224	cl = htb_find(skb->priority, sch);
 225	if (cl) {
 226		if (cl->level == 0)
 227			return cl;
 228		/* Start with inner filter chain if a non-leaf class is selected */
 229		tcf = rcu_dereference_bh(cl->filter_list);
 230	} else {
 231		tcf = rcu_dereference_bh(q->filter_list);
 232	}
 233
 234	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 235	while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
 236#ifdef CONFIG_NET_CLS_ACT
 237		switch (result) {
 238		case TC_ACT_QUEUED:
 239		case TC_ACT_STOLEN:
 240		case TC_ACT_TRAP:
 241			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 242			/* fall through */
 243		case TC_ACT_SHOT:
 244			return NULL;
 245		}
 246#endif
 247		cl = (void *)res.class;
 248		if (!cl) {
 249			if (res.classid == sch->handle)
 250				return HTB_DIRECT;	/* X:0 (direct flow) */
 251			cl = htb_find(res.classid, sch);
 252			if (!cl)
 253				break;	/* filter selected invalid classid */
 254		}
 255		if (!cl->level)
 256			return cl;	/* we hit leaf; return it */
 257
 258		/* we have got inner class; apply inner filter chain */
 259		tcf = rcu_dereference_bh(cl->filter_list);
 260	}
 261	/* classification failed; try to use default class */
 262	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 263	if (!cl || cl->level)
 264		return HTB_DIRECT;	/* bad default .. this is safe bet */
 265	return cl;
 266}
 267
 268/**
 269 * htb_add_to_id_tree - adds class to the round robin list
 
 
 
 270 *
 271 * Routine adds class to the list (actually tree) sorted by classid.
 272 * Make sure that class is not already on such list for given prio.
 273 */
 274static void htb_add_to_id_tree(struct rb_root *root,
 275			       struct htb_class *cl, int prio)
 276{
 277	struct rb_node **p = &root->rb_node, *parent = NULL;
 278
 279	while (*p) {
 280		struct htb_class *c;
 281		parent = *p;
 282		c = rb_entry(parent, struct htb_class, node[prio]);
 283
 284		if (cl->common.classid > c->common.classid)
 285			p = &parent->rb_right;
 286		else
 287			p = &parent->rb_left;
 288	}
 289	rb_link_node(&cl->node[prio], parent, p);
 290	rb_insert_color(&cl->node[prio], root);
 291}
 292
 293/**
 294 * htb_add_to_wait_tree - adds class to the event queue with delay
 
 
 
 295 *
 296 * The class is added to priority event queue to indicate that class will
 297 * change its mode in cl->pq_key microseconds. Make sure that class is not
 298 * already in the queue.
 299 */
 300static void htb_add_to_wait_tree(struct htb_sched *q,
 301				 struct htb_class *cl, s64 delay)
 302{
 303	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
 304
 305	cl->pq_key = q->now + delay;
 306	if (cl->pq_key == q->now)
 307		cl->pq_key++;
 308
 309	/* update the nearest event cache */
 310	if (q->near_ev_cache[cl->level] > cl->pq_key)
 311		q->near_ev_cache[cl->level] = cl->pq_key;
 312
 313	while (*p) {
 314		struct htb_class *c;
 315		parent = *p;
 316		c = rb_entry(parent, struct htb_class, pq_node);
 317		if (cl->pq_key >= c->pq_key)
 318			p = &parent->rb_right;
 319		else
 320			p = &parent->rb_left;
 321	}
 322	rb_link_node(&cl->pq_node, parent, p);
 323	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 324}
 325
 326/**
 327 * htb_next_rb_node - finds next node in binary tree
 
 328 *
 329 * When we are past last key we return NULL.
 330 * Average complexity is 2 steps per call.
 331 */
 332static inline void htb_next_rb_node(struct rb_node **n)
 333{
 334	*n = rb_next(*n);
 335}
 336
 337/**
 338 * htb_add_class_to_row - add class to its row
 
 
 
 339 *
 340 * The class is added to row at priorities marked in mask.
 341 * It does nothing if mask == 0.
 342 */
 343static inline void htb_add_class_to_row(struct htb_sched *q,
 344					struct htb_class *cl, int mask)
 345{
 346	q->row_mask[cl->level] |= mask;
 347	while (mask) {
 348		int prio = ffz(~mask);
 349		mask &= ~(1 << prio);
 350		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
 351	}
 352}
 353
 354/* If this triggers, it is a bug in this code, but it need not be fatal */
 355static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 356{
 357	if (RB_EMPTY_NODE(rb)) {
 358		WARN_ON(1);
 359	} else {
 360		rb_erase(rb, root);
 361		RB_CLEAR_NODE(rb);
 362	}
 363}
 364
 365
 366/**
 367 * htb_remove_class_from_row - removes class from its row
 
 
 
 368 *
 369 * The class is removed from row at priorities marked in mask.
 370 * It does nothing if mask == 0.
 371 */
 372static inline void htb_remove_class_from_row(struct htb_sched *q,
 373						 struct htb_class *cl, int mask)
 374{
 375	int m = 0;
 376	struct htb_level *hlevel = &q->hlevel[cl->level];
 377
 378	while (mask) {
 379		int prio = ffz(~mask);
 380		struct htb_prio *hprio = &hlevel->hprio[prio];
 381
 382		mask &= ~(1 << prio);
 383		if (hprio->ptr == cl->node + prio)
 384			htb_next_rb_node(&hprio->ptr);
 385
 386		htb_safe_rb_erase(cl->node + prio, &hprio->row);
 387		if (!hprio->row.rb_node)
 388			m |= 1 << prio;
 389	}
 390	q->row_mask[cl->level] &= ~m;
 391}
 392
 393/**
 394 * htb_activate_prios - creates active classe's feed chain
 
 
 395 *
 396 * The class is connected to ancestors and/or appropriate rows
 397 * for priorities it is participating on. cl->cmode must be new
 398 * (activated) mode. It does nothing if cl->prio_activity == 0.
 399 */
 400static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 401{
 402	struct htb_class *p = cl->parent;
 403	long m, mask = cl->prio_activity;
 404
 405	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 406		m = mask;
 407		while (m) {
 408			int prio = ffz(~m);
 
 
 
 409			m &= ~(1 << prio);
 410
 411			if (p->inner.clprio[prio].feed.rb_node)
 412				/* parent already has its feed in use so that
 413				 * reset bit in mask as parent is already ok
 414				 */
 415				mask &= ~(1 << prio);
 416
 417			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
 418		}
 419		p->prio_activity |= mask;
 420		cl = p;
 421		p = cl->parent;
 422
 423	}
 424	if (cl->cmode == HTB_CAN_SEND && mask)
 425		htb_add_class_to_row(q, cl, mask);
 426}
 427
 428/**
 429 * htb_deactivate_prios - remove class from feed chain
 
 
 430 *
 431 * cl->cmode must represent old mode (before deactivation). It does
 432 * nothing if cl->prio_activity == 0. Class is removed from all feed
 433 * chains and rows.
 434 */
 435static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 436{
 437	struct htb_class *p = cl->parent;
 438	long m, mask = cl->prio_activity;
 439
 440	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 441		m = mask;
 442		mask = 0;
 443		while (m) {
 444			int prio = ffz(~m);
 445			m &= ~(1 << prio);
 446
 447			if (p->inner.clprio[prio].ptr == cl->node + prio) {
 448				/* we are removing child which is pointed to from
 449				 * parent feed - forget the pointer but remember
 450				 * classid
 451				 */
 452				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
 453				p->inner.clprio[prio].ptr = NULL;
 454			}
 455
 456			htb_safe_rb_erase(cl->node + prio,
 457					  &p->inner.clprio[prio].feed);
 458
 459			if (!p->inner.clprio[prio].feed.rb_node)
 460				mask |= 1 << prio;
 461		}
 462
 463		p->prio_activity &= ~mask;
 464		cl = p;
 465		p = cl->parent;
 466
 467	}
 468	if (cl->cmode == HTB_CAN_SEND && mask)
 469		htb_remove_class_from_row(q, cl, mask);
 470}
 471
 472static inline s64 htb_lowater(const struct htb_class *cl)
 473{
 474	if (htb_hysteresis)
 475		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 476	else
 477		return 0;
 478}
 479static inline s64 htb_hiwater(const struct htb_class *cl)
 480{
 481	if (htb_hysteresis)
 482		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 483	else
 484		return 0;
 485}
 486
 487
 488/**
 489 * htb_class_mode - computes and returns current class mode
 
 
 490 *
 491 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 492 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 493 * from now to time when cl will change its state.
 494 * Also it is worth to note that class mode doesn't change simply
 495 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 496 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 497 * mode transitions per time unit. The speed gain is about 1/6.
 498 */
 499static inline enum htb_cmode
 500htb_class_mode(struct htb_class *cl, s64 *diff)
 501{
 502	s64 toks;
 503
 504	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 505		*diff = -toks;
 506		return HTB_CANT_SEND;
 507	}
 508
 509	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 510		return HTB_CAN_SEND;
 511
 512	*diff = -toks;
 513	return HTB_MAY_BORROW;
 514}
 515
 516/**
 517 * htb_change_class_mode - changes classe's mode
 
 
 
 518 *
 519 * This should be the only way how to change classe's mode under normal
 520 * cirsumstances. Routine will update feed lists linkage, change mode
 521 * and add class to the wait event queue if appropriate. New mode should
 522 * be different from old one and cl->pq_key has to be valid if changing
 523 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 524 */
 525static void
 526htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 527{
 528	enum htb_cmode new_mode = htb_class_mode(cl, diff);
 529
 530	if (new_mode == cl->cmode)
 531		return;
 532
 533	if (new_mode == HTB_CANT_SEND) {
 534		cl->overlimits++;
 535		q->overlimits++;
 536	}
 537
 538	if (cl->prio_activity) {	/* not necessary: speed optimization */
 539		if (cl->cmode != HTB_CANT_SEND)
 540			htb_deactivate_prios(q, cl);
 541		cl->cmode = new_mode;
 542		if (new_mode != HTB_CANT_SEND)
 543			htb_activate_prios(q, cl);
 544	} else
 545		cl->cmode = new_mode;
 546}
 547
 548/**
 549 * htb_activate - inserts leaf cl into appropriate active feeds
 
 
 550 *
 551 * Routine learns (new) priority of leaf and activates feed chain
 552 * for the prio. It can be called on already active leaf safely.
 553 * It also adds leaf into droplist.
 554 */
 555static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 556{
 557	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
 558
 559	if (!cl->prio_activity) {
 560		cl->prio_activity = 1 << cl->prio;
 561		htb_activate_prios(q, cl);
 562	}
 563}
 564
 565/**
 566 * htb_deactivate - remove leaf cl from active feeds
 
 
 567 *
 568 * Make sure that leaf is active. In the other words it can't be called
 569 * with non-active leaf. It also removes class from the drop list.
 570 */
 571static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 572{
 573	WARN_ON(!cl->prio_activity);
 574
 575	htb_deactivate_prios(q, cl);
 576	cl->prio_activity = 0;
 577}
 578
 579static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 580		       struct sk_buff **to_free)
 581{
 582	int uninitialized_var(ret);
 583	unsigned int len = qdisc_pkt_len(skb);
 584	struct htb_sched *q = qdisc_priv(sch);
 585	struct htb_class *cl = htb_classify(skb, sch, &ret);
 586
 587	if (cl == HTB_DIRECT) {
 588		/* enqueue to helper queue */
 589		if (q->direct_queue.qlen < q->direct_qlen) {
 590			__qdisc_enqueue_tail(skb, &q->direct_queue);
 591			q->direct_pkts++;
 592		} else {
 593			return qdisc_drop(skb, sch, to_free);
 594		}
 595#ifdef CONFIG_NET_CLS_ACT
 596	} else if (!cl) {
 597		if (ret & __NET_XMIT_BYPASS)
 598			qdisc_qstats_drop(sch);
 599		__qdisc_drop(skb, to_free);
 600		return ret;
 601#endif
 602	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
 603					to_free)) != NET_XMIT_SUCCESS) {
 604		if (net_xmit_drop_count(ret)) {
 605			qdisc_qstats_drop(sch);
 606			cl->drops++;
 607		}
 608		return ret;
 609	} else {
 610		htb_activate(q, cl);
 611	}
 612
 613	sch->qstats.backlog += len;
 614	sch->q.qlen++;
 615	return NET_XMIT_SUCCESS;
 616}
 617
 618static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 619{
 620	s64 toks = diff + cl->tokens;
 621
 622	if (toks > cl->buffer)
 623		toks = cl->buffer;
 624	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 625	if (toks <= -cl->mbuffer)
 626		toks = 1 - cl->mbuffer;
 627
 628	cl->tokens = toks;
 629}
 630
 631static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 632{
 633	s64 toks = diff + cl->ctokens;
 634
 635	if (toks > cl->cbuffer)
 636		toks = cl->cbuffer;
 637	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 638	if (toks <= -cl->mbuffer)
 639		toks = 1 - cl->mbuffer;
 640
 641	cl->ctokens = toks;
 642}
 643
 644/**
 645 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 
 
 
 
 646 *
 647 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 648 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 649 * leaf and all ancestors and to rate bucket for ancestors at levels
 650 * "level" and higher. It also handles possible change of mode resulting
 651 * from the update. Note that mode can also increase here (MAY_BORROW to
 652 * CAN_SEND) because we can use more precise clock that event queue here.
 653 * In such case we remove class from event queue first.
 654 */
 655static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 656			     int level, struct sk_buff *skb)
 657{
 658	int bytes = qdisc_pkt_len(skb);
 659	enum htb_cmode old_mode;
 660	s64 diff;
 661
 662	while (cl) {
 663		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 664		if (cl->level >= level) {
 665			if (cl->level == level)
 666				cl->xstats.lends++;
 667			htb_accnt_tokens(cl, bytes, diff);
 668		} else {
 669			cl->xstats.borrows++;
 670			cl->tokens += diff;	/* we moved t_c; update tokens */
 671		}
 672		htb_accnt_ctokens(cl, bytes, diff);
 673		cl->t_c = q->now;
 674
 675		old_mode = cl->cmode;
 676		diff = 0;
 677		htb_change_class_mode(q, cl, &diff);
 678		if (old_mode != cl->cmode) {
 679			if (old_mode != HTB_CAN_SEND)
 680				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 681			if (cl->cmode != HTB_CAN_SEND)
 682				htb_add_to_wait_tree(q, cl, diff);
 683		}
 684
 685		/* update basic stats except for leaves which are already updated */
 686		if (cl->level)
 687			bstats_update(&cl->bstats, skb);
 688
 689		cl = cl->parent;
 690	}
 691}
 692
 693/**
 694 * htb_do_events - make mode changes to classes at the level
 
 
 
 695 *
 696 * Scans event queue for pending events and applies them. Returns time of
 697 * next pending event (0 for no event in pq, q->now for too many events).
 698 * Note: Applied are events whose have cl->pq_key <= q->now.
 699 */
 700static s64 htb_do_events(struct htb_sched *q, const int level,
 701			 unsigned long start)
 702{
 703	/* don't run for longer than 2 jiffies; 2 is used instead of
 704	 * 1 to simplify things when jiffy is going to be incremented
 705	 * too soon
 706	 */
 707	unsigned long stop_at = start + 2;
 708	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
 709
 710	while (time_before(jiffies, stop_at)) {
 711		struct htb_class *cl;
 712		s64 diff;
 713		struct rb_node *p = rb_first(wait_pq);
 714
 715		if (!p)
 716			return 0;
 717
 718		cl = rb_entry(p, struct htb_class, pq_node);
 719		if (cl->pq_key > q->now)
 720			return cl->pq_key;
 721
 722		htb_safe_rb_erase(p, wait_pq);
 723		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 724		htb_change_class_mode(q, cl, &diff);
 725		if (cl->cmode != HTB_CAN_SEND)
 726			htb_add_to_wait_tree(q, cl, diff);
 727	}
 728
 729	/* too much load - let's continue after a break for scheduling */
 730	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
 731		pr_warn("htb: too many events!\n");
 732		q->warned |= HTB_WARN_TOOMANYEVENTS;
 733	}
 734
 735	return q->now;
 736}
 737
 738/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 739 * is no such one exists.
 740 */
 741static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 742					      u32 id)
 743{
 744	struct rb_node *r = NULL;
 745	while (n) {
 746		struct htb_class *cl =
 747		    rb_entry(n, struct htb_class, node[prio]);
 748
 749		if (id > cl->common.classid) {
 750			n = n->rb_right;
 751		} else if (id < cl->common.classid) {
 752			r = n;
 753			n = n->rb_left;
 754		} else {
 755			return n;
 756		}
 757	}
 758	return r;
 759}
 760
 761/**
 762 * htb_lookup_leaf - returns next leaf class in DRR order
 
 
 763 *
 764 * Find leaf where current feed pointers points to.
 765 */
 766static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
 767{
 768	int i;
 769	struct {
 770		struct rb_node *root;
 771		struct rb_node **pptr;
 772		u32 *pid;
 773	} stk[TC_HTB_MAXDEPTH], *sp = stk;
 774
 775	BUG_ON(!hprio->row.rb_node);
 776	sp->root = hprio->row.rb_node;
 777	sp->pptr = &hprio->ptr;
 778	sp->pid = &hprio->last_ptr_id;
 779
 780	for (i = 0; i < 65535; i++) {
 781		if (!*sp->pptr && *sp->pid) {
 782			/* ptr was invalidated but id is valid - try to recover
 783			 * the original or next ptr
 784			 */
 785			*sp->pptr =
 786			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
 787		}
 788		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
 789				 * can become out of date quickly
 790				 */
 791		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
 792			*sp->pptr = sp->root;
 793			while ((*sp->pptr)->rb_left)
 794				*sp->pptr = (*sp->pptr)->rb_left;
 795			if (sp > stk) {
 796				sp--;
 797				if (!*sp->pptr) {
 798					WARN_ON(1);
 799					return NULL;
 800				}
 801				htb_next_rb_node(sp->pptr);
 802			}
 803		} else {
 804			struct htb_class *cl;
 805			struct htb_prio *clp;
 806
 807			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 808			if (!cl->level)
 809				return cl;
 810			clp = &cl->inner.clprio[prio];
 811			(++sp)->root = clp->feed.rb_node;
 812			sp->pptr = &clp->ptr;
 813			sp->pid = &clp->last_ptr_id;
 814		}
 815	}
 816	WARN_ON(1);
 817	return NULL;
 818}
 819
 820/* dequeues packet at given priority and level; call only if
 821 * you are sure that there is active class at prio/level
 822 */
 823static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
 824					const int level)
 825{
 826	struct sk_buff *skb = NULL;
 827	struct htb_class *cl, *start;
 828	struct htb_level *hlevel = &q->hlevel[level];
 829	struct htb_prio *hprio = &hlevel->hprio[prio];
 830
 831	/* look initial class up in the row */
 832	start = cl = htb_lookup_leaf(hprio, prio);
 833
 834	do {
 835next:
 836		if (unlikely(!cl))
 837			return NULL;
 838
 839		/* class can be empty - it is unlikely but can be true if leaf
 840		 * qdisc drops packets in enqueue routine or if someone used
 841		 * graft operation on the leaf since last dequeue;
 842		 * simply deactivate and skip such class
 843		 */
 844		if (unlikely(cl->leaf.q->q.qlen == 0)) {
 845			struct htb_class *next;
 846			htb_deactivate(q, cl);
 847
 848			/* row/level might become empty */
 849			if ((q->row_mask[level] & (1 << prio)) == 0)
 850				return NULL;
 851
 852			next = htb_lookup_leaf(hprio, prio);
 853
 854			if (cl == start)	/* fix start if we just deleted it */
 855				start = next;
 856			cl = next;
 857			goto next;
 858		}
 859
 860		skb = cl->leaf.q->dequeue(cl->leaf.q);
 861		if (likely(skb != NULL))
 862			break;
 863
 864		qdisc_warn_nonwc("htb", cl->leaf.q);
 865		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
 866					 &q->hlevel[0].hprio[prio].ptr);
 867		cl = htb_lookup_leaf(hprio, prio);
 868
 869	} while (cl != start);
 870
 871	if (likely(skb != NULL)) {
 872		bstats_update(&cl->bstats, skb);
 873		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
 874		if (cl->leaf.deficit[level] < 0) {
 875			cl->leaf.deficit[level] += cl->quantum;
 876			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
 877						 &q->hlevel[0].hprio[prio].ptr);
 878		}
 879		/* this used to be after charge_class but this constelation
 880		 * gives us slightly better performance
 881		 */
 882		if (!cl->leaf.q->q.qlen)
 883			htb_deactivate(q, cl);
 884		htb_charge_class(q, cl, level, skb);
 885	}
 886	return skb;
 887}
 888
 889static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 890{
 891	struct sk_buff *skb;
 892	struct htb_sched *q = qdisc_priv(sch);
 893	int level;
 894	s64 next_event;
 895	unsigned long start_at;
 896
 897	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
 898	skb = __qdisc_dequeue_head(&q->direct_queue);
 899	if (skb != NULL) {
 900ok:
 901		qdisc_bstats_update(sch, skb);
 902		qdisc_qstats_backlog_dec(sch, skb);
 903		sch->q.qlen--;
 904		return skb;
 905	}
 906
 907	if (!sch->q.qlen)
 908		goto fin;
 909	q->now = ktime_get_ns();
 910	start_at = jiffies;
 911
 912	next_event = q->now + 5LLU * NSEC_PER_SEC;
 913
 914	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 915		/* common case optimization - skip event handler quickly */
 916		int m;
 917		s64 event = q->near_ev_cache[level];
 918
 919		if (q->now >= event) {
 920			event = htb_do_events(q, level, start_at);
 921			if (!event)
 922				event = q->now + NSEC_PER_SEC;
 923			q->near_ev_cache[level] = event;
 924		}
 925
 926		if (next_event > event)
 927			next_event = event;
 928
 929		m = ~q->row_mask[level];
 930		while (m != (int)(-1)) {
 931			int prio = ffz(m);
 932
 933			m |= 1 << prio;
 934			skb = htb_dequeue_tree(q, prio, level);
 935			if (likely(skb != NULL))
 936				goto ok;
 937		}
 938	}
 939	if (likely(next_event > q->now))
 940		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
 941	else
 942		schedule_work(&q->work);
 943fin:
 944	return skb;
 945}
 946
 947/* reset all classes */
 948/* always caled under BH & queue lock */
 949static void htb_reset(struct Qdisc *sch)
 950{
 951	struct htb_sched *q = qdisc_priv(sch);
 952	struct htb_class *cl;
 953	unsigned int i;
 954
 955	for (i = 0; i < q->clhash.hashsize; i++) {
 956		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
 957			if (cl->level)
 958				memset(&cl->inner, 0, sizeof(cl->inner));
 959			else {
 960				if (cl->leaf.q)
 961					qdisc_reset(cl->leaf.q);
 962			}
 963			cl->prio_activity = 0;
 964			cl->cmode = HTB_CAN_SEND;
 965		}
 966	}
 967	qdisc_watchdog_cancel(&q->watchdog);
 968	__qdisc_reset_queue(&q->direct_queue);
 969	sch->q.qlen = 0;
 970	sch->qstats.backlog = 0;
 971	memset(q->hlevel, 0, sizeof(q->hlevel));
 972	memset(q->row_mask, 0, sizeof(q->row_mask));
 973}
 974
 975static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 976	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
 977	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
 978	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 979	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 980	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
 981	[TCA_HTB_RATE64] = { .type = NLA_U64 },
 982	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
 
 983};
 984
 985static void htb_work_func(struct work_struct *work)
 986{
 987	struct htb_sched *q = container_of(work, struct htb_sched, work);
 988	struct Qdisc *sch = q->watchdog.qdisc;
 989
 990	rcu_read_lock();
 991	__netif_schedule(qdisc_root(sch));
 992	rcu_read_unlock();
 993}
 994
 
 
 
 
 
 
 
 
 
 
 
 
 995static int htb_init(struct Qdisc *sch, struct nlattr *opt,
 996		    struct netlink_ext_ack *extack)
 997{
 
 
 998	struct htb_sched *q = qdisc_priv(sch);
 999	struct nlattr *tb[TCA_HTB_MAX + 1];
1000	struct tc_htb_glob *gopt;
 
 
1001	int err;
1002
1003	qdisc_watchdog_init(&q->watchdog, sch);
1004	INIT_WORK(&q->work, htb_work_func);
1005
1006	if (!opt)
1007		return -EINVAL;
1008
1009	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1010	if (err)
1011		return err;
1012
1013	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1014					  NULL);
1015	if (err < 0)
1016		return err;
1017
1018	if (!tb[TCA_HTB_INIT])
1019		return -EINVAL;
1020
1021	gopt = nla_data(tb[TCA_HTB_INIT]);
1022	if (gopt->version != HTB_VER >> 16)
1023		return -EINVAL;
1024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1025	err = qdisc_class_hash_init(&q->clhash);
1026	if (err < 0)
1027		return err;
1028
1029	qdisc_skb_head_init(&q->direct_queue);
1030
1031	if (tb[TCA_HTB_DIRECT_QLEN])
1032		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1033	else
1034		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1035
1036	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1037		q->rate2quantum = 1;
1038	q->defcls = gopt->defcls;
1039
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1040	return 0;
1041}
1042
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1043static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1044{
1045	struct htb_sched *q = qdisc_priv(sch);
1046	struct nlattr *nest;
1047	struct tc_htb_glob gopt;
1048
 
 
 
 
 
1049	sch->qstats.overlimits = q->overlimits;
1050	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1051	 * no change can happen on the qdisc parameters.
1052	 */
1053
1054	gopt.direct_pkts = q->direct_pkts;
1055	gopt.version = HTB_VER;
1056	gopt.rate2quantum = q->rate2quantum;
1057	gopt.defcls = q->defcls;
1058	gopt.debug = 0;
1059
1060	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1061	if (nest == NULL)
1062		goto nla_put_failure;
1063	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1064	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1065		goto nla_put_failure;
 
 
1066
1067	return nla_nest_end(skb, nest);
1068
1069nla_put_failure:
1070	nla_nest_cancel(skb, nest);
1071	return -1;
1072}
1073
1074static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1075			  struct sk_buff *skb, struct tcmsg *tcm)
1076{
1077	struct htb_class *cl = (struct htb_class *)arg;
 
1078	struct nlattr *nest;
1079	struct tc_htb_opt opt;
1080
1081	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1082	 * no change can happen on the class parameters.
1083	 */
1084	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1085	tcm->tcm_handle = cl->common.classid;
1086	if (!cl->level && cl->leaf.q)
1087		tcm->tcm_info = cl->leaf.q->handle;
1088
1089	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1090	if (nest == NULL)
1091		goto nla_put_failure;
1092
1093	memset(&opt, 0, sizeof(opt));
1094
1095	psched_ratecfg_getrate(&opt.rate, &cl->rate);
1096	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1097	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1098	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1099	opt.quantum = cl->quantum;
1100	opt.prio = cl->prio;
1101	opt.level = cl->level;
1102	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1103		goto nla_put_failure;
 
 
1104	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1105	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1106			      TCA_HTB_PAD))
1107		goto nla_put_failure;
1108	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1109	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1110			      TCA_HTB_PAD))
1111		goto nla_put_failure;
1112
1113	return nla_nest_end(skb, nest);
1114
1115nla_put_failure:
1116	nla_nest_cancel(skb, nest);
1117	return -1;
1118}
1119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1120static int
1121htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1122{
1123	struct htb_class *cl = (struct htb_class *)arg;
 
1124	struct gnet_stats_queue qs = {
1125		.drops = cl->drops,
1126		.overlimits = cl->overlimits,
1127	};
1128	__u32 qlen = 0;
1129
1130	if (!cl->level && cl->leaf.q)
1131		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1132
1133	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1134				    INT_MIN, INT_MAX);
1135	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1136				     INT_MIN, INT_MAX);
1137
1138	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1139				  d, NULL, &cl->bstats) < 0 ||
 
 
 
 
 
 
 
 
 
 
 
 
 
1140	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1141	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1142		return -1;
1143
1144	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1145}
1146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1147static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1148		     struct Qdisc **old, struct netlink_ext_ack *extack)
1149{
 
1150	struct htb_class *cl = (struct htb_class *)arg;
 
 
1151
1152	if (cl->level)
1153		return -EINVAL;
1154	if (new == NULL &&
1155	    (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1156				     cl->common.classid, extack)) == NULL)
1157		return -ENOBUFS;
 
 
 
 
 
 
 
 
 
 
 
 
 
1158
1159	*old = qdisc_replace(sch, new, &cl->leaf.q);
 
 
 
 
 
 
1160	return 0;
1161}
1162
1163static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1164{
1165	struct htb_class *cl = (struct htb_class *)arg;
1166	return !cl->level ? cl->leaf.q : NULL;
1167}
1168
1169static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1170{
1171	struct htb_class *cl = (struct htb_class *)arg;
1172
1173	htb_deactivate(qdisc_priv(sch), cl);
1174}
1175
1176static inline int htb_parent_last_child(struct htb_class *cl)
1177{
1178	if (!cl->parent)
1179		/* the root class */
1180		return 0;
1181	if (cl->parent->children > 1)
1182		/* not the last child */
1183		return 0;
1184	return 1;
1185}
1186
1187static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1188			       struct Qdisc *new_q)
1189{
 
1190	struct htb_class *parent = cl->parent;
1191
1192	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1193
1194	if (parent->cmode != HTB_CAN_SEND)
1195		htb_safe_rb_erase(&parent->pq_node,
1196				  &q->hlevel[parent->level].wait_pq);
1197
1198	parent->level = 0;
1199	memset(&parent->inner, 0, sizeof(parent->inner));
1200	parent->leaf.q = new_q ? new_q : &noop_qdisc;
1201	parent->tokens = parent->buffer;
1202	parent->ctokens = parent->cbuffer;
1203	parent->t_c = ktime_get_ns();
1204	parent->cmode = HTB_CAN_SEND;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1205}
1206
1207static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1208{
1209	if (!cl->level) {
1210		WARN_ON(!cl->leaf.q);
1211		qdisc_put(cl->leaf.q);
1212	}
1213	gen_kill_estimator(&cl->rate_est);
1214	tcf_block_put(cl->block);
1215	kfree(cl);
1216}
1217
1218static void htb_destroy(struct Qdisc *sch)
1219{
 
 
1220	struct htb_sched *q = qdisc_priv(sch);
1221	struct hlist_node *next;
 
1222	struct htb_class *cl;
1223	unsigned int i;
1224
1225	cancel_work_sync(&q->work);
1226	qdisc_watchdog_cancel(&q->watchdog);
1227	/* This line used to be after htb_destroy_class call below
1228	 * and surprisingly it worked in 2.4. But it must precede it
1229	 * because filter need its target class alive to be able to call
1230	 * unbind_filter on it (without Oops).
1231	 */
1232	tcf_block_put(q->block);
1233
1234	for (i = 0; i < q->clhash.hashsize; i++) {
1235		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1236			tcf_block_put(cl->block);
1237			cl->block = NULL;
1238		}
1239	}
1240	for (i = 0; i < q->clhash.hashsize; i++) {
1241		hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1242					  common.hnode)
1243			htb_destroy_class(sch, cl);
1244	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1245	qdisc_class_hash_destroy(&q->clhash);
1246	__qdisc_reset_queue(&q->direct_queue);
 
 
 
 
 
 
 
 
 
 
 
 
 
1247}
1248
1249static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
1250{
1251	struct htb_sched *q = qdisc_priv(sch);
1252	struct htb_class *cl = (struct htb_class *)arg;
1253	struct Qdisc *new_q = NULL;
1254	int last_child = 0;
 
1255
1256	/* TODO: why don't allow to delete subtree ? references ? does
1257	 * tc subsys guarantee us that in htb_destroy it holds no class
1258	 * refs so that we can remove children safely there ?
1259	 */
1260	if (cl->children || cl->filter_cnt)
1261		return -EBUSY;
1262
1263	if (!cl->level && htb_parent_last_child(cl)) {
1264		new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1265					  cl->parent->common.classid,
1266					  NULL);
1267		last_child = 1;
 
 
 
 
1268	}
1269
1270	sch_tree_lock(sch);
1271
1272	if (!cl->level)
1273		qdisc_purge_queue(cl->leaf.q);
1274
1275	/* delete from hash and active; remainder in destroy_class */
1276	qdisc_class_hash_remove(&q->clhash, &cl->common);
1277	if (cl->parent)
1278		cl->parent->children--;
1279
1280	if (cl->prio_activity)
1281		htb_deactivate(q, cl);
1282
1283	if (cl->cmode != HTB_CAN_SEND)
1284		htb_safe_rb_erase(&cl->pq_node,
1285				  &q->hlevel[cl->level].wait_pq);
1286
1287	if (last_child)
1288		htb_parent_to_leaf(q, cl, new_q);
1289
1290	sch_tree_unlock(sch);
1291
1292	htb_destroy_class(sch, cl);
1293	return 0;
1294}
1295
1296static int htb_change_class(struct Qdisc *sch, u32 classid,
1297			    u32 parentid, struct nlattr **tca,
1298			    unsigned long *arg, struct netlink_ext_ack *extack)
1299{
1300	int err = -EINVAL;
1301	struct htb_sched *q = qdisc_priv(sch);
1302	struct htb_class *cl = (struct htb_class *)*arg, *parent;
 
1303	struct nlattr *opt = tca[TCA_OPTIONS];
1304	struct nlattr *tb[TCA_HTB_MAX + 1];
1305	struct Qdisc *parent_qdisc = NULL;
 
1306	struct tc_htb_opt *hopt;
1307	u64 rate64, ceil64;
1308	int warn = 0;
1309
1310	/* extract all subattrs from opt attr */
1311	if (!opt)
1312		goto failure;
1313
1314	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1315					  NULL);
1316	if (err < 0)
1317		goto failure;
1318
1319	err = -EINVAL;
1320	if (tb[TCA_HTB_PARMS] == NULL)
1321		goto failure;
1322
1323	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1324
1325	hopt = nla_data(tb[TCA_HTB_PARMS]);
1326	if (!hopt->rate.rate || !hopt->ceil.rate)
1327		goto failure;
1328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1329	/* Keeping backward compatible with rate_table based iproute2 tc */
1330	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1331		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1332					      NULL));
1333
1334	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1335		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1336					      NULL));
1337
 
 
 
1338	if (!cl) {		/* new class */
1339		struct Qdisc *new_q;
 
1340		int prio;
1341		struct {
1342			struct nlattr		nla;
1343			struct gnet_estimator	opt;
1344		} est = {
1345			.nla = {
1346				.nla_len	= nla_attr_size(sizeof(est.opt)),
1347				.nla_type	= TCA_RATE,
1348			},
1349			.opt = {
1350				/* 4s interval, 16s averaging constant */
1351				.interval	= 2,
1352				.ewma_log	= 2,
1353			},
1354		};
1355
1356		/* check for valid classid */
1357		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1358		    htb_find(classid, sch))
1359			goto failure;
1360
1361		/* check maximal depth */
1362		if (parent && parent->parent && parent->parent->level < 2) {
1363			pr_err("htb: tree is too deep\n");
1364			goto failure;
1365		}
1366		err = -ENOBUFS;
1367		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1368		if (!cl)
1369			goto failure;
1370
 
 
 
1371		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1372		if (err) {
1373			kfree(cl);
1374			goto failure;
1375		}
1376		if (htb_rate_est || tca[TCA_RATE]) {
1377			err = gen_new_estimator(&cl->bstats, NULL,
1378						&cl->rate_est,
1379						NULL,
1380						qdisc_root_sleeping_running(sch),
1381						tca[TCA_RATE] ? : &est.nla);
1382			if (err) {
1383				tcf_block_put(cl->block);
1384				kfree(cl);
1385				goto failure;
1386			}
1387		}
1388
1389		cl->children = 0;
1390		RB_CLEAR_NODE(&cl->pq_node);
1391
1392		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1393			RB_CLEAR_NODE(&cl->node[prio]);
1394
 
 
 
 
 
 
 
1395		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1396		 * so that can't be used inside of sch_tree_lock
1397		 * -- thanks to Karlis Peisenieks
1398		 */
1399		new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1400					  classid, NULL);
 
 
 
 
 
 
 
 
 
 
 
 
1401		sch_tree_lock(sch);
1402		if (parent && !parent->level) {
1403			/* turn parent into inner node */
1404			qdisc_purge_queue(parent->leaf.q);
1405			parent_qdisc = parent->leaf.q;
1406			if (parent->prio_activity)
1407				htb_deactivate(q, parent);
1408
1409			/* remove from evt list because of level change */
1410			if (parent->cmode != HTB_CAN_SEND) {
1411				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1412				parent->cmode = HTB_CAN_SEND;
1413			}
1414			parent->level = (parent->parent ? parent->parent->level
1415					 : TC_HTB_MAXDEPTH) - 1;
1416			memset(&parent->inner, 0, sizeof(parent->inner));
1417		}
 
1418		/* leaf (we) needs elementary qdisc */
1419		cl->leaf.q = new_q ? new_q : &noop_qdisc;
 
 
1420
1421		cl->common.classid = classid;
1422		cl->parent = parent;
1423
1424		/* set class to be in HTB_CAN_SEND state */
1425		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1426		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1427		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
1428		cl->t_c = ktime_get_ns();
1429		cl->cmode = HTB_CAN_SEND;
1430
1431		/* attach to the hash list and parent's family */
1432		qdisc_class_hash_insert(&q->clhash, &cl->common);
1433		if (parent)
1434			parent->children++;
1435		if (cl->leaf.q != &noop_qdisc)
1436			qdisc_hash_add(cl->leaf.q, true);
1437	} else {
1438		if (tca[TCA_RATE]) {
1439			err = gen_replace_estimator(&cl->bstats, NULL,
1440						    &cl->rate_est,
1441						    NULL,
1442						    qdisc_root_sleeping_running(sch),
1443						    tca[TCA_RATE]);
1444			if (err)
1445				return err;
1446		}
1447		sch_tree_lock(sch);
1448	}
1449
1450	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
 
1451
1452	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1453
1454	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1455	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1456
1457	/* it used to be a nasty bug here, we have to check that node
1458	 * is really leaf before changing cl->leaf !
1459	 */
1460	if (!cl->level) {
1461		u64 quantum = cl->rate.rate_bytes_ps;
1462
1463		do_div(quantum, q->rate2quantum);
1464		cl->quantum = min_t(u64, quantum, INT_MAX);
1465
1466		if (!hopt->quantum && cl->quantum < 1000) {
1467			warn = -1;
1468			cl->quantum = 1000;
1469		}
1470		if (!hopt->quantum && cl->quantum > 200000) {
1471			warn = 1;
1472			cl->quantum = 200000;
1473		}
1474		if (hopt->quantum)
1475			cl->quantum = hopt->quantum;
1476		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1477			cl->prio = TC_HTB_NUMPRIO - 1;
1478	}
1479
1480	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
1481	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
1482
1483	sch_tree_unlock(sch);
1484	qdisc_put(parent_qdisc);
1485
1486	if (warn)
1487		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
1488			    cl->common.classid, (warn == -1 ? "small" : "big"));
1489
1490	qdisc_class_hash_grow(sch, &q->clhash);
1491
1492	*arg = (unsigned long)cl;
1493	return 0;
1494
 
 
 
 
 
1495failure:
1496	return err;
1497}
1498
1499static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
1500				       struct netlink_ext_ack *extack)
1501{
1502	struct htb_sched *q = qdisc_priv(sch);
1503	struct htb_class *cl = (struct htb_class *)arg;
1504
1505	return cl ? cl->block : q->block;
1506}
1507
1508static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1509				     u32 classid)
1510{
1511	struct htb_class *cl = htb_find(classid, sch);
1512
1513	/*if (cl && !cl->level) return 0;
1514	 * The line above used to be there to prevent attaching filters to
1515	 * leaves. But at least tc_index filter uses this just to get class
1516	 * for other reasons so that we have to allow for it.
1517	 * ----
1518	 * 19.6.2002 As Werner explained it is ok - bind filter is just
1519	 * another way to "lock" the class - unlike "get" this lock can
1520	 * be broken by class during destroy IIUC.
1521	 */
1522	if (cl)
1523		cl->filter_cnt++;
1524	return (unsigned long)cl;
1525}
1526
1527static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1528{
1529	struct htb_class *cl = (struct htb_class *)arg;
1530
1531	if (cl)
1532		cl->filter_cnt--;
1533}
1534
1535static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1536{
1537	struct htb_sched *q = qdisc_priv(sch);
1538	struct htb_class *cl;
1539	unsigned int i;
1540
1541	if (arg->stop)
1542		return;
1543
1544	for (i = 0; i < q->clhash.hashsize; i++) {
1545		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1546			if (arg->count < arg->skip) {
1547				arg->count++;
1548				continue;
1549			}
1550			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1551				arg->stop = 1;
1552				return;
1553			}
1554			arg->count++;
1555		}
1556	}
1557}
1558
1559static const struct Qdisc_class_ops htb_class_ops = {
 
1560	.graft		=	htb_graft,
1561	.leaf		=	htb_leaf,
1562	.qlen_notify	=	htb_qlen_notify,
1563	.find		=	htb_search,
1564	.change		=	htb_change_class,
1565	.delete		=	htb_delete,
1566	.walk		=	htb_walk,
1567	.tcf_block	=	htb_tcf_block,
1568	.bind_tcf	=	htb_bind_filter,
1569	.unbind_tcf	=	htb_unbind_filter,
1570	.dump		=	htb_dump_class,
1571	.dump_stats	=	htb_dump_class_stats,
1572};
1573
1574static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1575	.cl_ops		=	&htb_class_ops,
1576	.id		=	"htb",
1577	.priv_size	=	sizeof(struct htb_sched),
1578	.enqueue	=	htb_enqueue,
1579	.dequeue	=	htb_dequeue,
1580	.peek		=	qdisc_peek_dequeued,
1581	.init		=	htb_init,
 
1582	.reset		=	htb_reset,
1583	.destroy	=	htb_destroy,
1584	.dump		=	htb_dump,
1585	.owner		=	THIS_MODULE,
1586};
1587
1588static int __init htb_module_init(void)
1589{
1590	return register_qdisc(&htb_qdisc_ops);
1591}
1592static void __exit htb_module_exit(void)
1593{
1594	unregister_qdisc(&htb_qdisc_ops);
1595}
1596
1597module_init(htb_module_init)
1598module_exit(htb_module_exit)
1599MODULE_LICENSE("GPL");
v6.2
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
   4 *
   5 * Authors:	Martin Devera, <devik@cdi.cz>
   6 *
   7 * Credits (in time order) for older HTB versions:
   8 *              Stef Coene <stef.coene@docum.org>
   9 *			HTB support at LARTC mailing list
  10 *		Ondrej Kraus, <krauso@barr.cz>
  11 *			found missing INIT_QDISC(htb)
  12 *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
  13 *			helped a lot to locate nasty class stall bug
  14 *		Andi Kleen, Jamal Hadi, Bert Hubert
  15 *			code review and helpful comments on shaping
  16 *		Tomasz Wrona, <tw@eter.tym.pl>
  17 *			created test case so that I was able to fix nasty bug
  18 *		Wilfried Weissmann
  19 *			spotted bug in dequeue code and helped with fix
  20 *		Jiri Fojtasek
  21 *			fixed requeue routine
  22 *		and many others. thanks.
  23 */
  24#include <linux/module.h>
  25#include <linux/moduleparam.h>
  26#include <linux/types.h>
  27#include <linux/kernel.h>
  28#include <linux/string.h>
  29#include <linux/errno.h>
  30#include <linux/skbuff.h>
  31#include <linux/list.h>
  32#include <linux/compiler.h>
  33#include <linux/rbtree.h>
  34#include <linux/workqueue.h>
  35#include <linux/slab.h>
  36#include <net/netlink.h>
  37#include <net/sch_generic.h>
  38#include <net/pkt_sched.h>
  39#include <net/pkt_cls.h>
  40
  41/* HTB algorithm.
  42    Author: devik@cdi.cz
  43    ========================================================================
  44    HTB is like TBF with multiple classes. It is also similar to CBQ because
  45    it allows to assign priority to each class in hierarchy.
  46    In fact it is another implementation of Floyd's formal sharing.
  47
  48    Levels:
  49    Each class is assigned level. Leaf has ALWAYS level 0 and root
  50    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
  51    one less than their parent.
  52*/
  53
  54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
  55#define HTB_VER 0x30011		/* major must be matched with number supplied by TC as version */
  56
  57#if HTB_VER >> 16 != TC_HTB_PROTOVER
  58#error "Mismatched sch_htb.c and pkt_sch.h"
  59#endif
  60
  61/* Module parameter and sysfs export */
  62module_param    (htb_hysteresis, int, 0640);
  63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
  64
  65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
  66module_param(htb_rate_est, int, 0640);
  67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
  68
  69/* used internaly to keep status of single class */
  70enum htb_cmode {
  71	HTB_CANT_SEND,		/* class can't send and can't borrow */
  72	HTB_MAY_BORROW,		/* class can't send but may borrow */
  73	HTB_CAN_SEND		/* class can send */
  74};
  75
  76struct htb_prio {
  77	union {
  78		struct rb_root	row;
  79		struct rb_root	feed;
  80	};
  81	struct rb_node	*ptr;
  82	/* When class changes from state 1->2 and disconnects from
  83	 * parent's feed then we lost ptr value and start from the
  84	 * first child again. Here we store classid of the
  85	 * last valid ptr (used when ptr is NULL).
  86	 */
  87	u32		last_ptr_id;
  88};
  89
  90/* interior & leaf nodes; props specific to leaves are marked L:
  91 * To reduce false sharing, place mostly read fields at beginning,
  92 * and mostly written ones at the end.
  93 */
  94struct htb_class {
  95	struct Qdisc_class_common common;
  96	struct psched_ratecfg	rate;
  97	struct psched_ratecfg	ceil;
  98	s64			buffer, cbuffer;/* token bucket depth/rate */
  99	s64			mbuffer;	/* max wait time */
 100	u32			prio;		/* these two are used only by leaves... */
 101	int			quantum;	/* but stored for parent-to-leaf return */
 102
 103	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
 104	struct tcf_block	*block;
 105	int			filter_cnt;
 106
 107	int			level;		/* our level (see above) */
 108	unsigned int		children;
 109	struct htb_class	*parent;	/* parent class */
 110
 111	struct net_rate_estimator __rcu *rate_est;
 112
 113	/*
 114	 * Written often fields
 115	 */
 116	struct gnet_stats_basic_sync bstats;
 117	struct gnet_stats_basic_sync bstats_bias;
 118	struct tc_htb_xstats	xstats;	/* our special stats */
 119
 120	/* token bucket parameters */
 121	s64			tokens, ctokens;/* current number of tokens */
 122	s64			t_c;		/* checkpoint time */
 123
 124	union {
 125		struct htb_class_leaf {
 126			int		deficit[TC_HTB_MAXDEPTH];
 127			struct Qdisc	*q;
 128			struct netdev_queue *offload_queue;
 129		} leaf;
 130		struct htb_class_inner {
 131			struct htb_prio clprio[TC_HTB_NUMPRIO];
 132		} inner;
 133	};
 134	s64			pq_key;
 135
 136	int			prio_activity;	/* for which prios are we active */
 137	enum htb_cmode		cmode;		/* current mode of the class */
 138	struct rb_node		pq_node;	/* node for event queue */
 139	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
 140
 141	unsigned int drops ____cacheline_aligned_in_smp;
 142	unsigned int		overlimits;
 143};
 144
 145struct htb_level {
 146	struct rb_root	wait_pq;
 147	struct htb_prio hprio[TC_HTB_NUMPRIO];
 148};
 149
 150struct htb_sched {
 151	struct Qdisc_class_hash clhash;
 152	int			defcls;		/* class where unclassified flows go to */
 153	int			rate2quantum;	/* quant = rate / rate2quantum */
 154
 155	/* filters for qdisc itself */
 156	struct tcf_proto __rcu	*filter_list;
 157	struct tcf_block	*block;
 158
 159#define HTB_WARN_TOOMANYEVENTS	0x1
 160	unsigned int		warned;	/* only one warning */
 161	int			direct_qlen;
 162	struct work_struct	work;
 163
 164	/* non shaped skbs; let them go directly thru */
 165	struct qdisc_skb_head	direct_queue;
 166	u32			direct_pkts;
 167	u32			overlimits;
 168
 169	struct qdisc_watchdog	watchdog;
 170
 171	s64			now;	/* cached dequeue time */
 172
 173	/* time of nearest event per level (row) */
 174	s64			near_ev_cache[TC_HTB_MAXDEPTH];
 175
 176	int			row_mask[TC_HTB_MAXDEPTH];
 177
 178	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
 179
 180	struct Qdisc		**direct_qdiscs;
 181	unsigned int            num_direct_qdiscs;
 182
 183	bool			offload;
 184};
 185
 186/* find class in global hash table using given handle */
 187static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 188{
 189	struct htb_sched *q = qdisc_priv(sch);
 190	struct Qdisc_class_common *clc;
 191
 192	clc = qdisc_class_find(&q->clhash, handle);
 193	if (clc == NULL)
 194		return NULL;
 195	return container_of(clc, struct htb_class, common);
 196}
 197
 198static unsigned long htb_search(struct Qdisc *sch, u32 handle)
 199{
 200	return (unsigned long)htb_find(handle, sch);
 201}
 202
 203#define HTB_DIRECT ((struct htb_class *)-1L)
 204
 205/**
 206 * htb_classify - classify a packet into class
 207 * @skb: the socket buffer
 208 * @sch: the active queue discipline
 209 * @qerr: pointer for returned status code
 210 *
 211 * It returns NULL if the packet should be dropped or -1 if the packet
 212 * should be passed directly thru. In all other cases leaf class is returned.
 213 * We allow direct class selection by classid in priority. The we examine
 214 * filters in qdisc and in inner nodes (if higher filter points to the inner
 215 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 216 * internal fifo (direct). These packets then go directly thru. If we still
 217 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
 218 * then finish and return direct queue.
 219 */
 
 
 220static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
 221				      int *qerr)
 222{
 223	struct htb_sched *q = qdisc_priv(sch);
 224	struct htb_class *cl;
 225	struct tcf_result res;
 226	struct tcf_proto *tcf;
 227	int result;
 228
 229	/* allow to select class by setting skb->priority to valid classid;
 230	 * note that nfmark can be used too by attaching filter fw with no
 231	 * rules in it
 232	 */
 233	if (skb->priority == sch->handle)
 234		return HTB_DIRECT;	/* X:0 (direct flow) selected */
 235	cl = htb_find(skb->priority, sch);
 236	if (cl) {
 237		if (cl->level == 0)
 238			return cl;
 239		/* Start with inner filter chain if a non-leaf class is selected */
 240		tcf = rcu_dereference_bh(cl->filter_list);
 241	} else {
 242		tcf = rcu_dereference_bh(q->filter_list);
 243	}
 244
 245	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 246	while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
 247#ifdef CONFIG_NET_CLS_ACT
 248		switch (result) {
 249		case TC_ACT_QUEUED:
 250		case TC_ACT_STOLEN:
 251		case TC_ACT_TRAP:
 252			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
 253			fallthrough;
 254		case TC_ACT_SHOT:
 255			return NULL;
 256		}
 257#endif
 258		cl = (void *)res.class;
 259		if (!cl) {
 260			if (res.classid == sch->handle)
 261				return HTB_DIRECT;	/* X:0 (direct flow) */
 262			cl = htb_find(res.classid, sch);
 263			if (!cl)
 264				break;	/* filter selected invalid classid */
 265		}
 266		if (!cl->level)
 267			return cl;	/* we hit leaf; return it */
 268
 269		/* we have got inner class; apply inner filter chain */
 270		tcf = rcu_dereference_bh(cl->filter_list);
 271	}
 272	/* classification failed; try to use default class */
 273	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
 274	if (!cl || cl->level)
 275		return HTB_DIRECT;	/* bad default .. this is safe bet */
 276	return cl;
 277}
 278
 279/**
 280 * htb_add_to_id_tree - adds class to the round robin list
 281 * @root: the root of the tree
 282 * @cl: the class to add
 283 * @prio: the give prio in class
 284 *
 285 * Routine adds class to the list (actually tree) sorted by classid.
 286 * Make sure that class is not already on such list for given prio.
 287 */
 288static void htb_add_to_id_tree(struct rb_root *root,
 289			       struct htb_class *cl, int prio)
 290{
 291	struct rb_node **p = &root->rb_node, *parent = NULL;
 292
 293	while (*p) {
 294		struct htb_class *c;
 295		parent = *p;
 296		c = rb_entry(parent, struct htb_class, node[prio]);
 297
 298		if (cl->common.classid > c->common.classid)
 299			p = &parent->rb_right;
 300		else
 301			p = &parent->rb_left;
 302	}
 303	rb_link_node(&cl->node[prio], parent, p);
 304	rb_insert_color(&cl->node[prio], root);
 305}
 306
 307/**
 308 * htb_add_to_wait_tree - adds class to the event queue with delay
 309 * @q: the priority event queue
 310 * @cl: the class to add
 311 * @delay: delay in microseconds
 312 *
 313 * The class is added to priority event queue to indicate that class will
 314 * change its mode in cl->pq_key microseconds. Make sure that class is not
 315 * already in the queue.
 316 */
 317static void htb_add_to_wait_tree(struct htb_sched *q,
 318				 struct htb_class *cl, s64 delay)
 319{
 320	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
 321
 322	cl->pq_key = q->now + delay;
 323	if (cl->pq_key == q->now)
 324		cl->pq_key++;
 325
 326	/* update the nearest event cache */
 327	if (q->near_ev_cache[cl->level] > cl->pq_key)
 328		q->near_ev_cache[cl->level] = cl->pq_key;
 329
 330	while (*p) {
 331		struct htb_class *c;
 332		parent = *p;
 333		c = rb_entry(parent, struct htb_class, pq_node);
 334		if (cl->pq_key >= c->pq_key)
 335			p = &parent->rb_right;
 336		else
 337			p = &parent->rb_left;
 338	}
 339	rb_link_node(&cl->pq_node, parent, p);
 340	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 341}
 342
 343/**
 344 * htb_next_rb_node - finds next node in binary tree
 345 * @n: the current node in binary tree
 346 *
 347 * When we are past last key we return NULL.
 348 * Average complexity is 2 steps per call.
 349 */
 350static inline void htb_next_rb_node(struct rb_node **n)
 351{
 352	*n = rb_next(*n);
 353}
 354
 355/**
 356 * htb_add_class_to_row - add class to its row
 357 * @q: the priority event queue
 358 * @cl: the class to add
 359 * @mask: the given priorities in class in bitmap
 360 *
 361 * The class is added to row at priorities marked in mask.
 362 * It does nothing if mask == 0.
 363 */
 364static inline void htb_add_class_to_row(struct htb_sched *q,
 365					struct htb_class *cl, int mask)
 366{
 367	q->row_mask[cl->level] |= mask;
 368	while (mask) {
 369		int prio = ffz(~mask);
 370		mask &= ~(1 << prio);
 371		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
 372	}
 373}
 374
 375/* If this triggers, it is a bug in this code, but it need not be fatal */
 376static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
 377{
 378	if (RB_EMPTY_NODE(rb)) {
 379		WARN_ON(1);
 380	} else {
 381		rb_erase(rb, root);
 382		RB_CLEAR_NODE(rb);
 383	}
 384}
 385
 386
 387/**
 388 * htb_remove_class_from_row - removes class from its row
 389 * @q: the priority event queue
 390 * @cl: the class to add
 391 * @mask: the given priorities in class in bitmap
 392 *
 393 * The class is removed from row at priorities marked in mask.
 394 * It does nothing if mask == 0.
 395 */
 396static inline void htb_remove_class_from_row(struct htb_sched *q,
 397						 struct htb_class *cl, int mask)
 398{
 399	int m = 0;
 400	struct htb_level *hlevel = &q->hlevel[cl->level];
 401
 402	while (mask) {
 403		int prio = ffz(~mask);
 404		struct htb_prio *hprio = &hlevel->hprio[prio];
 405
 406		mask &= ~(1 << prio);
 407		if (hprio->ptr == cl->node + prio)
 408			htb_next_rb_node(&hprio->ptr);
 409
 410		htb_safe_rb_erase(cl->node + prio, &hprio->row);
 411		if (!hprio->row.rb_node)
 412			m |= 1 << prio;
 413	}
 414	q->row_mask[cl->level] &= ~m;
 415}
 416
 417/**
 418 * htb_activate_prios - creates active classe's feed chain
 419 * @q: the priority event queue
 420 * @cl: the class to activate
 421 *
 422 * The class is connected to ancestors and/or appropriate rows
 423 * for priorities it is participating on. cl->cmode must be new
 424 * (activated) mode. It does nothing if cl->prio_activity == 0.
 425 */
 426static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
 427{
 428	struct htb_class *p = cl->parent;
 429	long m, mask = cl->prio_activity;
 430
 431	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 432		m = mask;
 433		while (m) {
 434			unsigned int prio = ffz(~m);
 435
 436			if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
 437				break;
 438			m &= ~(1 << prio);
 439
 440			if (p->inner.clprio[prio].feed.rb_node)
 441				/* parent already has its feed in use so that
 442				 * reset bit in mask as parent is already ok
 443				 */
 444				mask &= ~(1 << prio);
 445
 446			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
 447		}
 448		p->prio_activity |= mask;
 449		cl = p;
 450		p = cl->parent;
 451
 452	}
 453	if (cl->cmode == HTB_CAN_SEND && mask)
 454		htb_add_class_to_row(q, cl, mask);
 455}
 456
 457/**
 458 * htb_deactivate_prios - remove class from feed chain
 459 * @q: the priority event queue
 460 * @cl: the class to deactivate
 461 *
 462 * cl->cmode must represent old mode (before deactivation). It does
 463 * nothing if cl->prio_activity == 0. Class is removed from all feed
 464 * chains and rows.
 465 */
 466static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
 467{
 468	struct htb_class *p = cl->parent;
 469	long m, mask = cl->prio_activity;
 470
 471	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
 472		m = mask;
 473		mask = 0;
 474		while (m) {
 475			int prio = ffz(~m);
 476			m &= ~(1 << prio);
 477
 478			if (p->inner.clprio[prio].ptr == cl->node + prio) {
 479				/* we are removing child which is pointed to from
 480				 * parent feed - forget the pointer but remember
 481				 * classid
 482				 */
 483				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
 484				p->inner.clprio[prio].ptr = NULL;
 485			}
 486
 487			htb_safe_rb_erase(cl->node + prio,
 488					  &p->inner.clprio[prio].feed);
 489
 490			if (!p->inner.clprio[prio].feed.rb_node)
 491				mask |= 1 << prio;
 492		}
 493
 494		p->prio_activity &= ~mask;
 495		cl = p;
 496		p = cl->parent;
 497
 498	}
 499	if (cl->cmode == HTB_CAN_SEND && mask)
 500		htb_remove_class_from_row(q, cl, mask);
 501}
 502
 503static inline s64 htb_lowater(const struct htb_class *cl)
 504{
 505	if (htb_hysteresis)
 506		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
 507	else
 508		return 0;
 509}
 510static inline s64 htb_hiwater(const struct htb_class *cl)
 511{
 512	if (htb_hysteresis)
 513		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
 514	else
 515		return 0;
 516}
 517
 518
 519/**
 520 * htb_class_mode - computes and returns current class mode
 521 * @cl: the target class
 522 * @diff: diff time in microseconds
 523 *
 524 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
 525 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
 526 * from now to time when cl will change its state.
 527 * Also it is worth to note that class mode doesn't change simply
 528 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
 529 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
 530 * mode transitions per time unit. The speed gain is about 1/6.
 531 */
 532static inline enum htb_cmode
 533htb_class_mode(struct htb_class *cl, s64 *diff)
 534{
 535	s64 toks;
 536
 537	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
 538		*diff = -toks;
 539		return HTB_CANT_SEND;
 540	}
 541
 542	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
 543		return HTB_CAN_SEND;
 544
 545	*diff = -toks;
 546	return HTB_MAY_BORROW;
 547}
 548
 549/**
 550 * htb_change_class_mode - changes classe's mode
 551 * @q: the priority event queue
 552 * @cl: the target class
 553 * @diff: diff time in microseconds
 554 *
 555 * This should be the only way how to change classe's mode under normal
 556 * circumstances. Routine will update feed lists linkage, change mode
 557 * and add class to the wait event queue if appropriate. New mode should
 558 * be different from old one and cl->pq_key has to be valid if changing
 559 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
 560 */
 561static void
 562htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
 563{
 564	enum htb_cmode new_mode = htb_class_mode(cl, diff);
 565
 566	if (new_mode == cl->cmode)
 567		return;
 568
 569	if (new_mode == HTB_CANT_SEND) {
 570		cl->overlimits++;
 571		q->overlimits++;
 572	}
 573
 574	if (cl->prio_activity) {	/* not necessary: speed optimization */
 575		if (cl->cmode != HTB_CANT_SEND)
 576			htb_deactivate_prios(q, cl);
 577		cl->cmode = new_mode;
 578		if (new_mode != HTB_CANT_SEND)
 579			htb_activate_prios(q, cl);
 580	} else
 581		cl->cmode = new_mode;
 582}
 583
 584/**
 585 * htb_activate - inserts leaf cl into appropriate active feeds
 586 * @q: the priority event queue
 587 * @cl: the target class
 588 *
 589 * Routine learns (new) priority of leaf and activates feed chain
 590 * for the prio. It can be called on already active leaf safely.
 591 * It also adds leaf into droplist.
 592 */
 593static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
 594{
 595	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
 596
 597	if (!cl->prio_activity) {
 598		cl->prio_activity = 1 << cl->prio;
 599		htb_activate_prios(q, cl);
 600	}
 601}
 602
 603/**
 604 * htb_deactivate - remove leaf cl from active feeds
 605 * @q: the priority event queue
 606 * @cl: the target class
 607 *
 608 * Make sure that leaf is active. In the other words it can't be called
 609 * with non-active leaf. It also removes class from the drop list.
 610 */
 611static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
 612{
 613	WARN_ON(!cl->prio_activity);
 614
 615	htb_deactivate_prios(q, cl);
 616	cl->prio_activity = 0;
 617}
 618
 619static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 620		       struct sk_buff **to_free)
 621{
 622	int ret;
 623	unsigned int len = qdisc_pkt_len(skb);
 624	struct htb_sched *q = qdisc_priv(sch);
 625	struct htb_class *cl = htb_classify(skb, sch, &ret);
 626
 627	if (cl == HTB_DIRECT) {
 628		/* enqueue to helper queue */
 629		if (q->direct_queue.qlen < q->direct_qlen) {
 630			__qdisc_enqueue_tail(skb, &q->direct_queue);
 631			q->direct_pkts++;
 632		} else {
 633			return qdisc_drop(skb, sch, to_free);
 634		}
 635#ifdef CONFIG_NET_CLS_ACT
 636	} else if (!cl) {
 637		if (ret & __NET_XMIT_BYPASS)
 638			qdisc_qstats_drop(sch);
 639		__qdisc_drop(skb, to_free);
 640		return ret;
 641#endif
 642	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
 643					to_free)) != NET_XMIT_SUCCESS) {
 644		if (net_xmit_drop_count(ret)) {
 645			qdisc_qstats_drop(sch);
 646			cl->drops++;
 647		}
 648		return ret;
 649	} else {
 650		htb_activate(q, cl);
 651	}
 652
 653	sch->qstats.backlog += len;
 654	sch->q.qlen++;
 655	return NET_XMIT_SUCCESS;
 656}
 657
 658static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
 659{
 660	s64 toks = diff + cl->tokens;
 661
 662	if (toks > cl->buffer)
 663		toks = cl->buffer;
 664	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
 665	if (toks <= -cl->mbuffer)
 666		toks = 1 - cl->mbuffer;
 667
 668	cl->tokens = toks;
 669}
 670
 671static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
 672{
 673	s64 toks = diff + cl->ctokens;
 674
 675	if (toks > cl->cbuffer)
 676		toks = cl->cbuffer;
 677	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
 678	if (toks <= -cl->mbuffer)
 679		toks = 1 - cl->mbuffer;
 680
 681	cl->ctokens = toks;
 682}
 683
 684/**
 685 * htb_charge_class - charges amount "bytes" to leaf and ancestors
 686 * @q: the priority event queue
 687 * @cl: the class to start iterate
 688 * @level: the minimum level to account
 689 * @skb: the socket buffer
 690 *
 691 * Routine assumes that packet "bytes" long was dequeued from leaf cl
 692 * borrowing from "level". It accounts bytes to ceil leaky bucket for
 693 * leaf and all ancestors and to rate bucket for ancestors at levels
 694 * "level" and higher. It also handles possible change of mode resulting
 695 * from the update. Note that mode can also increase here (MAY_BORROW to
 696 * CAN_SEND) because we can use more precise clock that event queue here.
 697 * In such case we remove class from event queue first.
 698 */
 699static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
 700			     int level, struct sk_buff *skb)
 701{
 702	int bytes = qdisc_pkt_len(skb);
 703	enum htb_cmode old_mode;
 704	s64 diff;
 705
 706	while (cl) {
 707		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 708		if (cl->level >= level) {
 709			if (cl->level == level)
 710				cl->xstats.lends++;
 711			htb_accnt_tokens(cl, bytes, diff);
 712		} else {
 713			cl->xstats.borrows++;
 714			cl->tokens += diff;	/* we moved t_c; update tokens */
 715		}
 716		htb_accnt_ctokens(cl, bytes, diff);
 717		cl->t_c = q->now;
 718
 719		old_mode = cl->cmode;
 720		diff = 0;
 721		htb_change_class_mode(q, cl, &diff);
 722		if (old_mode != cl->cmode) {
 723			if (old_mode != HTB_CAN_SEND)
 724				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
 725			if (cl->cmode != HTB_CAN_SEND)
 726				htb_add_to_wait_tree(q, cl, diff);
 727		}
 728
 729		/* update basic stats except for leaves which are already updated */
 730		if (cl->level)
 731			bstats_update(&cl->bstats, skb);
 732
 733		cl = cl->parent;
 734	}
 735}
 736
 737/**
 738 * htb_do_events - make mode changes to classes at the level
 739 * @q: the priority event queue
 740 * @level: which wait_pq in 'q->hlevel'
 741 * @start: start jiffies
 742 *
 743 * Scans event queue for pending events and applies them. Returns time of
 744 * next pending event (0 for no event in pq, q->now for too many events).
 745 * Note: Applied are events whose have cl->pq_key <= q->now.
 746 */
 747static s64 htb_do_events(struct htb_sched *q, const int level,
 748			 unsigned long start)
 749{
 750	/* don't run for longer than 2 jiffies; 2 is used instead of
 751	 * 1 to simplify things when jiffy is going to be incremented
 752	 * too soon
 753	 */
 754	unsigned long stop_at = start + 2;
 755	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
 756
 757	while (time_before(jiffies, stop_at)) {
 758		struct htb_class *cl;
 759		s64 diff;
 760		struct rb_node *p = rb_first(wait_pq);
 761
 762		if (!p)
 763			return 0;
 764
 765		cl = rb_entry(p, struct htb_class, pq_node);
 766		if (cl->pq_key > q->now)
 767			return cl->pq_key;
 768
 769		htb_safe_rb_erase(p, wait_pq);
 770		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
 771		htb_change_class_mode(q, cl, &diff);
 772		if (cl->cmode != HTB_CAN_SEND)
 773			htb_add_to_wait_tree(q, cl, diff);
 774	}
 775
 776	/* too much load - let's continue after a break for scheduling */
 777	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
 778		pr_warn("htb: too many events!\n");
 779		q->warned |= HTB_WARN_TOOMANYEVENTS;
 780	}
 781
 782	return q->now;
 783}
 784
 785/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
 786 * is no such one exists.
 787 */
 788static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
 789					      u32 id)
 790{
 791	struct rb_node *r = NULL;
 792	while (n) {
 793		struct htb_class *cl =
 794		    rb_entry(n, struct htb_class, node[prio]);
 795
 796		if (id > cl->common.classid) {
 797			n = n->rb_right;
 798		} else if (id < cl->common.classid) {
 799			r = n;
 800			n = n->rb_left;
 801		} else {
 802			return n;
 803		}
 804	}
 805	return r;
 806}
 807
 808/**
 809 * htb_lookup_leaf - returns next leaf class in DRR order
 810 * @hprio: the current one
 811 * @prio: which prio in class
 812 *
 813 * Find leaf where current feed pointers points to.
 814 */
 815static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
 816{
 817	int i;
 818	struct {
 819		struct rb_node *root;
 820		struct rb_node **pptr;
 821		u32 *pid;
 822	} stk[TC_HTB_MAXDEPTH], *sp = stk;
 823
 824	BUG_ON(!hprio->row.rb_node);
 825	sp->root = hprio->row.rb_node;
 826	sp->pptr = &hprio->ptr;
 827	sp->pid = &hprio->last_ptr_id;
 828
 829	for (i = 0; i < 65535; i++) {
 830		if (!*sp->pptr && *sp->pid) {
 831			/* ptr was invalidated but id is valid - try to recover
 832			 * the original or next ptr
 833			 */
 834			*sp->pptr =
 835			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
 836		}
 837		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
 838				 * can become out of date quickly
 839				 */
 840		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
 841			*sp->pptr = sp->root;
 842			while ((*sp->pptr)->rb_left)
 843				*sp->pptr = (*sp->pptr)->rb_left;
 844			if (sp > stk) {
 845				sp--;
 846				if (!*sp->pptr) {
 847					WARN_ON(1);
 848					return NULL;
 849				}
 850				htb_next_rb_node(sp->pptr);
 851			}
 852		} else {
 853			struct htb_class *cl;
 854			struct htb_prio *clp;
 855
 856			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
 857			if (!cl->level)
 858				return cl;
 859			clp = &cl->inner.clprio[prio];
 860			(++sp)->root = clp->feed.rb_node;
 861			sp->pptr = &clp->ptr;
 862			sp->pid = &clp->last_ptr_id;
 863		}
 864	}
 865	WARN_ON(1);
 866	return NULL;
 867}
 868
 869/* dequeues packet at given priority and level; call only if
 870 * you are sure that there is active class at prio/level
 871 */
 872static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
 873					const int level)
 874{
 875	struct sk_buff *skb = NULL;
 876	struct htb_class *cl, *start;
 877	struct htb_level *hlevel = &q->hlevel[level];
 878	struct htb_prio *hprio = &hlevel->hprio[prio];
 879
 880	/* look initial class up in the row */
 881	start = cl = htb_lookup_leaf(hprio, prio);
 882
 883	do {
 884next:
 885		if (unlikely(!cl))
 886			return NULL;
 887
 888		/* class can be empty - it is unlikely but can be true if leaf
 889		 * qdisc drops packets in enqueue routine or if someone used
 890		 * graft operation on the leaf since last dequeue;
 891		 * simply deactivate and skip such class
 892		 */
 893		if (unlikely(cl->leaf.q->q.qlen == 0)) {
 894			struct htb_class *next;
 895			htb_deactivate(q, cl);
 896
 897			/* row/level might become empty */
 898			if ((q->row_mask[level] & (1 << prio)) == 0)
 899				return NULL;
 900
 901			next = htb_lookup_leaf(hprio, prio);
 902
 903			if (cl == start)	/* fix start if we just deleted it */
 904				start = next;
 905			cl = next;
 906			goto next;
 907		}
 908
 909		skb = cl->leaf.q->dequeue(cl->leaf.q);
 910		if (likely(skb != NULL))
 911			break;
 912
 913		qdisc_warn_nonwc("htb", cl->leaf.q);
 914		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
 915					 &q->hlevel[0].hprio[prio].ptr);
 916		cl = htb_lookup_leaf(hprio, prio);
 917
 918	} while (cl != start);
 919
 920	if (likely(skb != NULL)) {
 921		bstats_update(&cl->bstats, skb);
 922		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
 923		if (cl->leaf.deficit[level] < 0) {
 924			cl->leaf.deficit[level] += cl->quantum;
 925			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
 926						 &q->hlevel[0].hprio[prio].ptr);
 927		}
 928		/* this used to be after charge_class but this constelation
 929		 * gives us slightly better performance
 930		 */
 931		if (!cl->leaf.q->q.qlen)
 932			htb_deactivate(q, cl);
 933		htb_charge_class(q, cl, level, skb);
 934	}
 935	return skb;
 936}
 937
 938static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 939{
 940	struct sk_buff *skb;
 941	struct htb_sched *q = qdisc_priv(sch);
 942	int level;
 943	s64 next_event;
 944	unsigned long start_at;
 945
 946	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
 947	skb = __qdisc_dequeue_head(&q->direct_queue);
 948	if (skb != NULL) {
 949ok:
 950		qdisc_bstats_update(sch, skb);
 951		qdisc_qstats_backlog_dec(sch, skb);
 952		sch->q.qlen--;
 953		return skb;
 954	}
 955
 956	if (!sch->q.qlen)
 957		goto fin;
 958	q->now = ktime_get_ns();
 959	start_at = jiffies;
 960
 961	next_event = q->now + 5LLU * NSEC_PER_SEC;
 962
 963	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
 964		/* common case optimization - skip event handler quickly */
 965		int m;
 966		s64 event = q->near_ev_cache[level];
 967
 968		if (q->now >= event) {
 969			event = htb_do_events(q, level, start_at);
 970			if (!event)
 971				event = q->now + NSEC_PER_SEC;
 972			q->near_ev_cache[level] = event;
 973		}
 974
 975		if (next_event > event)
 976			next_event = event;
 977
 978		m = ~q->row_mask[level];
 979		while (m != (int)(-1)) {
 980			int prio = ffz(m);
 981
 982			m |= 1 << prio;
 983			skb = htb_dequeue_tree(q, prio, level);
 984			if (likely(skb != NULL))
 985				goto ok;
 986		}
 987	}
 988	if (likely(next_event > q->now))
 989		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
 990	else
 991		schedule_work(&q->work);
 992fin:
 993	return skb;
 994}
 995
 996/* reset all classes */
 997/* always caled under BH & queue lock */
 998static void htb_reset(struct Qdisc *sch)
 999{
1000	struct htb_sched *q = qdisc_priv(sch);
1001	struct htb_class *cl;
1002	unsigned int i;
1003
1004	for (i = 0; i < q->clhash.hashsize; i++) {
1005		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1006			if (cl->level)
1007				memset(&cl->inner, 0, sizeof(cl->inner));
1008			else {
1009				if (cl->leaf.q && !q->offload)
1010					qdisc_reset(cl->leaf.q);
1011			}
1012			cl->prio_activity = 0;
1013			cl->cmode = HTB_CAN_SEND;
1014		}
1015	}
1016	qdisc_watchdog_cancel(&q->watchdog);
1017	__qdisc_reset_queue(&q->direct_queue);
 
 
1018	memset(q->hlevel, 0, sizeof(q->hlevel));
1019	memset(q->row_mask, 0, sizeof(q->row_mask));
1020}
1021
1022static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1023	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
1024	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
1025	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1026	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1027	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1028	[TCA_HTB_RATE64] = { .type = NLA_U64 },
1029	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
1030	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1031};
1032
1033static void htb_work_func(struct work_struct *work)
1034{
1035	struct htb_sched *q = container_of(work, struct htb_sched, work);
1036	struct Qdisc *sch = q->watchdog.qdisc;
1037
1038	rcu_read_lock();
1039	__netif_schedule(qdisc_root(sch));
1040	rcu_read_unlock();
1041}
1042
1043static void htb_set_lockdep_class_child(struct Qdisc *q)
1044{
1045	static struct lock_class_key child_key;
1046
1047	lockdep_set_class(qdisc_lock(q), &child_key);
1048}
1049
1050static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1051{
1052	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1053}
1054
1055static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1056		    struct netlink_ext_ack *extack)
1057{
1058	struct net_device *dev = qdisc_dev(sch);
1059	struct tc_htb_qopt_offload offload_opt;
1060	struct htb_sched *q = qdisc_priv(sch);
1061	struct nlattr *tb[TCA_HTB_MAX + 1];
1062	struct tc_htb_glob *gopt;
1063	unsigned int ntx;
1064	bool offload;
1065	int err;
1066
1067	qdisc_watchdog_init(&q->watchdog, sch);
1068	INIT_WORK(&q->work, htb_work_func);
1069
1070	if (!opt)
1071		return -EINVAL;
1072
1073	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1074	if (err)
1075		return err;
1076
1077	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1078					  NULL);
1079	if (err < 0)
1080		return err;
1081
1082	if (!tb[TCA_HTB_INIT])
1083		return -EINVAL;
1084
1085	gopt = nla_data(tb[TCA_HTB_INIT]);
1086	if (gopt->version != HTB_VER >> 16)
1087		return -EINVAL;
1088
1089	offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1090
1091	if (offload) {
1092		if (sch->parent != TC_H_ROOT) {
1093			NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1094			return -EOPNOTSUPP;
1095		}
1096
1097		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1098			NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1099			return -EOPNOTSUPP;
1100		}
1101
1102		q->num_direct_qdiscs = dev->real_num_tx_queues;
1103		q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1104					   sizeof(*q->direct_qdiscs),
1105					   GFP_KERNEL);
1106		if (!q->direct_qdiscs)
1107			return -ENOMEM;
1108	}
1109
1110	err = qdisc_class_hash_init(&q->clhash);
1111	if (err < 0)
1112		return err;
1113
 
 
1114	if (tb[TCA_HTB_DIRECT_QLEN])
1115		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1116	else
1117		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1118
1119	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1120		q->rate2quantum = 1;
1121	q->defcls = gopt->defcls;
1122
1123	if (!offload)
1124		return 0;
1125
1126	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1127		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1128		struct Qdisc *qdisc;
1129
1130		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1131					  TC_H_MAKE(sch->handle, 0), extack);
1132		if (!qdisc) {
1133			return -ENOMEM;
1134		}
1135
1136		htb_set_lockdep_class_child(qdisc);
1137		q->direct_qdiscs[ntx] = qdisc;
1138		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1139	}
1140
1141	sch->flags |= TCQ_F_MQROOT;
1142
1143	offload_opt = (struct tc_htb_qopt_offload) {
1144		.command = TC_HTB_CREATE,
1145		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
1146		.classid = TC_H_MIN(q->defcls),
1147		.extack = extack,
1148	};
1149	err = htb_offload(dev, &offload_opt);
1150	if (err)
1151		return err;
1152
1153	/* Defer this assignment, so that htb_destroy skips offload-related
1154	 * parts (especially calling ndo_setup_tc) on errors.
1155	 */
1156	q->offload = true;
1157
1158	return 0;
1159}
1160
1161static void htb_attach_offload(struct Qdisc *sch)
1162{
1163	struct net_device *dev = qdisc_dev(sch);
1164	struct htb_sched *q = qdisc_priv(sch);
1165	unsigned int ntx;
1166
1167	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1168		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1169
1170		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1171		qdisc_put(old);
1172		qdisc_hash_add(qdisc, false);
1173	}
1174	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1175		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1176		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1177
1178		qdisc_put(old);
1179	}
1180
1181	kfree(q->direct_qdiscs);
1182	q->direct_qdiscs = NULL;
1183}
1184
1185static void htb_attach_software(struct Qdisc *sch)
1186{
1187	struct net_device *dev = qdisc_dev(sch);
1188	unsigned int ntx;
1189
1190	/* Resemble qdisc_graft behavior. */
1191	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1192		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1193		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1194
1195		qdisc_refcount_inc(sch);
1196
1197		qdisc_put(old);
1198	}
1199}
1200
1201static void htb_attach(struct Qdisc *sch)
1202{
1203	struct htb_sched *q = qdisc_priv(sch);
1204
1205	if (q->offload)
1206		htb_attach_offload(sch);
1207	else
1208		htb_attach_software(sch);
1209}
1210
1211static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1212{
1213	struct htb_sched *q = qdisc_priv(sch);
1214	struct nlattr *nest;
1215	struct tc_htb_glob gopt;
1216
1217	if (q->offload)
1218		sch->flags |= TCQ_F_OFFLOADED;
1219	else
1220		sch->flags &= ~TCQ_F_OFFLOADED;
1221
1222	sch->qstats.overlimits = q->overlimits;
1223	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1224	 * no change can happen on the qdisc parameters.
1225	 */
1226
1227	gopt.direct_pkts = q->direct_pkts;
1228	gopt.version = HTB_VER;
1229	gopt.rate2quantum = q->rate2quantum;
1230	gopt.defcls = q->defcls;
1231	gopt.debug = 0;
1232
1233	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1234	if (nest == NULL)
1235		goto nla_put_failure;
1236	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1237	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1238		goto nla_put_failure;
1239	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1240		goto nla_put_failure;
1241
1242	return nla_nest_end(skb, nest);
1243
1244nla_put_failure:
1245	nla_nest_cancel(skb, nest);
1246	return -1;
1247}
1248
1249static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1250			  struct sk_buff *skb, struct tcmsg *tcm)
1251{
1252	struct htb_class *cl = (struct htb_class *)arg;
1253	struct htb_sched *q = qdisc_priv(sch);
1254	struct nlattr *nest;
1255	struct tc_htb_opt opt;
1256
1257	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1258	 * no change can happen on the class parameters.
1259	 */
1260	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1261	tcm->tcm_handle = cl->common.classid;
1262	if (!cl->level && cl->leaf.q)
1263		tcm->tcm_info = cl->leaf.q->handle;
1264
1265	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1266	if (nest == NULL)
1267		goto nla_put_failure;
1268
1269	memset(&opt, 0, sizeof(opt));
1270
1271	psched_ratecfg_getrate(&opt.rate, &cl->rate);
1272	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1273	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1274	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1275	opt.quantum = cl->quantum;
1276	opt.prio = cl->prio;
1277	opt.level = cl->level;
1278	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1279		goto nla_put_failure;
1280	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1281		goto nla_put_failure;
1282	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1283	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1284			      TCA_HTB_PAD))
1285		goto nla_put_failure;
1286	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1287	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1288			      TCA_HTB_PAD))
1289		goto nla_put_failure;
1290
1291	return nla_nest_end(skb, nest);
1292
1293nla_put_failure:
1294	nla_nest_cancel(skb, nest);
1295	return -1;
1296}
1297
1298static void htb_offload_aggregate_stats(struct htb_sched *q,
1299					struct htb_class *cl)
1300{
1301	u64 bytes = 0, packets = 0;
1302	struct htb_class *c;
1303	unsigned int i;
1304
1305	gnet_stats_basic_sync_init(&cl->bstats);
1306
1307	for (i = 0; i < q->clhash.hashsize; i++) {
1308		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1309			struct htb_class *p = c;
1310
1311			while (p && p->level < cl->level)
1312				p = p->parent;
1313
1314			if (p != cl)
1315				continue;
1316
1317			bytes += u64_stats_read(&c->bstats_bias.bytes);
1318			packets += u64_stats_read(&c->bstats_bias.packets);
1319			if (c->level == 0) {
1320				bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1321				packets += u64_stats_read(&c->leaf.q->bstats.packets);
1322			}
1323		}
1324	}
1325	_bstats_update(&cl->bstats, bytes, packets);
1326}
1327
1328static int
1329htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1330{
1331	struct htb_class *cl = (struct htb_class *)arg;
1332	struct htb_sched *q = qdisc_priv(sch);
1333	struct gnet_stats_queue qs = {
1334		.drops = cl->drops,
1335		.overlimits = cl->overlimits,
1336	};
1337	__u32 qlen = 0;
1338
1339	if (!cl->level && cl->leaf.q)
1340		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1341
1342	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1343				    INT_MIN, INT_MAX);
1344	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1345				     INT_MIN, INT_MAX);
1346
1347	if (q->offload) {
1348		if (!cl->level) {
1349			if (cl->leaf.q)
1350				cl->bstats = cl->leaf.q->bstats;
1351			else
1352				gnet_stats_basic_sync_init(&cl->bstats);
1353			_bstats_update(&cl->bstats,
1354				       u64_stats_read(&cl->bstats_bias.bytes),
1355				       u64_stats_read(&cl->bstats_bias.packets));
1356		} else {
1357			htb_offload_aggregate_stats(q, cl);
1358		}
1359	}
1360
1361	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1362	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1363	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1364		return -1;
1365
1366	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1367}
1368
1369static struct netdev_queue *
1370htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1371{
1372	struct net_device *dev = qdisc_dev(sch);
1373	struct tc_htb_qopt_offload offload_opt;
1374	struct htb_sched *q = qdisc_priv(sch);
1375	int err;
1376
1377	if (!q->offload)
1378		return sch->dev_queue;
1379
1380	offload_opt = (struct tc_htb_qopt_offload) {
1381		.command = TC_HTB_LEAF_QUERY_QUEUE,
1382		.classid = TC_H_MIN(tcm->tcm_parent),
1383	};
1384	err = htb_offload(dev, &offload_opt);
1385	if (err || offload_opt.qid >= dev->num_tx_queues)
1386		return NULL;
1387	return netdev_get_tx_queue(dev, offload_opt.qid);
1388}
1389
1390static struct Qdisc *
1391htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1392{
1393	struct net_device *dev = dev_queue->dev;
1394	struct Qdisc *old_q;
1395
1396	if (dev->flags & IFF_UP)
1397		dev_deactivate(dev);
1398	old_q = dev_graft_qdisc(dev_queue, new_q);
1399	if (new_q)
1400		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1401	if (dev->flags & IFF_UP)
1402		dev_activate(dev);
1403
1404	return old_q;
1405}
1406
1407static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1408{
1409	struct netdev_queue *queue;
1410
1411	queue = cl->leaf.offload_queue;
1412	if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1413		WARN_ON(cl->leaf.q->dev_queue != queue);
1414
1415	return queue;
1416}
1417
1418static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1419				   struct htb_class *cl_new, bool destroying)
1420{
1421	struct netdev_queue *queue_old, *queue_new;
1422	struct net_device *dev = qdisc_dev(sch);
1423
1424	queue_old = htb_offload_get_queue(cl_old);
1425	queue_new = htb_offload_get_queue(cl_new);
1426
1427	if (!destroying) {
1428		struct Qdisc *qdisc;
1429
1430		if (dev->flags & IFF_UP)
1431			dev_deactivate(dev);
1432		qdisc = dev_graft_qdisc(queue_old, NULL);
1433		WARN_ON(qdisc != cl_old->leaf.q);
1434	}
1435
1436	if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1437		cl_old->leaf.q->dev_queue = queue_new;
1438	cl_old->leaf.offload_queue = queue_new;
1439
1440	if (!destroying) {
1441		struct Qdisc *qdisc;
1442
1443		qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1444		if (dev->flags & IFF_UP)
1445			dev_activate(dev);
1446		WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1447	}
1448}
1449
1450static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1451		     struct Qdisc **old, struct netlink_ext_ack *extack)
1452{
1453	struct netdev_queue *dev_queue = sch->dev_queue;
1454	struct htb_class *cl = (struct htb_class *)arg;
1455	struct htb_sched *q = qdisc_priv(sch);
1456	struct Qdisc *old_q;
1457
1458	if (cl->level)
1459		return -EINVAL;
1460
1461	if (q->offload)
1462		dev_queue = htb_offload_get_queue(cl);
1463
1464	if (!new) {
1465		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1466					cl->common.classid, extack);
1467		if (!new)
1468			return -ENOBUFS;
1469	}
1470
1471	if (q->offload) {
1472		htb_set_lockdep_class_child(new);
1473		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1474		qdisc_refcount_inc(new);
1475		old_q = htb_graft_helper(dev_queue, new);
1476	}
1477
1478	*old = qdisc_replace(sch, new, &cl->leaf.q);
1479
1480	if (q->offload) {
1481		WARN_ON(old_q != *old);
1482		qdisc_put(old_q);
1483	}
1484
1485	return 0;
1486}
1487
1488static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1489{
1490	struct htb_class *cl = (struct htb_class *)arg;
1491	return !cl->level ? cl->leaf.q : NULL;
1492}
1493
1494static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1495{
1496	struct htb_class *cl = (struct htb_class *)arg;
1497
1498	htb_deactivate(qdisc_priv(sch), cl);
1499}
1500
1501static inline int htb_parent_last_child(struct htb_class *cl)
1502{
1503	if (!cl->parent)
1504		/* the root class */
1505		return 0;
1506	if (cl->parent->children > 1)
1507		/* not the last child */
1508		return 0;
1509	return 1;
1510}
1511
1512static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1513			       struct Qdisc *new_q)
1514{
1515	struct htb_sched *q = qdisc_priv(sch);
1516	struct htb_class *parent = cl->parent;
1517
1518	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1519
1520	if (parent->cmode != HTB_CAN_SEND)
1521		htb_safe_rb_erase(&parent->pq_node,
1522				  &q->hlevel[parent->level].wait_pq);
1523
1524	parent->level = 0;
1525	memset(&parent->inner, 0, sizeof(parent->inner));
1526	parent->leaf.q = new_q ? new_q : &noop_qdisc;
1527	parent->tokens = parent->buffer;
1528	parent->ctokens = parent->cbuffer;
1529	parent->t_c = ktime_get_ns();
1530	parent->cmode = HTB_CAN_SEND;
1531	if (q->offload)
1532		parent->leaf.offload_queue = cl->leaf.offload_queue;
1533}
1534
1535static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1536				       struct netdev_queue *dev_queue,
1537				       struct Qdisc *new_q)
1538{
1539	struct Qdisc *old_q;
1540
1541	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1542	if (new_q)
1543		qdisc_refcount_inc(new_q);
1544	old_q = htb_graft_helper(dev_queue, new_q);
1545	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1546}
1547
1548static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1549				     bool last_child, bool destroying,
1550				     struct netlink_ext_ack *extack)
1551{
1552	struct tc_htb_qopt_offload offload_opt;
1553	struct netdev_queue *dev_queue;
1554	struct Qdisc *q = cl->leaf.q;
1555	struct Qdisc *old;
1556	int err;
1557
1558	if (cl->level)
1559		return -EINVAL;
1560
1561	WARN_ON(!q);
1562	dev_queue = htb_offload_get_queue(cl);
1563	/* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1564	 * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1565	 * does not need to graft or qdisc_put the qdisc being destroyed.
1566	 */
1567	if (!destroying) {
1568		old = htb_graft_helper(dev_queue, NULL);
1569		/* Last qdisc grafted should be the same as cl->leaf.q when
1570		 * calling htb_delete.
1571		 */
1572		WARN_ON(old != q);
1573	}
1574
1575	if (cl->parent) {
1576		_bstats_update(&cl->parent->bstats_bias,
1577			       u64_stats_read(&q->bstats.bytes),
1578			       u64_stats_read(&q->bstats.packets));
1579	}
1580
1581	offload_opt = (struct tc_htb_qopt_offload) {
1582		.command = !last_child ? TC_HTB_LEAF_DEL :
1583			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1584			   TC_HTB_LEAF_DEL_LAST,
1585		.classid = cl->common.classid,
1586		.extack = extack,
1587	};
1588	err = htb_offload(qdisc_dev(sch), &offload_opt);
1589
1590	if (!destroying) {
1591		if (!err)
1592			qdisc_put(old);
1593		else
1594			htb_graft_helper(dev_queue, old);
1595	}
1596
1597	if (last_child)
1598		return err;
1599
1600	if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1601		u32 classid = TC_H_MAJ(sch->handle) |
1602			      TC_H_MIN(offload_opt.classid);
1603		struct htb_class *moved_cl = htb_find(classid, sch);
1604
1605		htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1606	}
1607
1608	return err;
1609}
1610
1611static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1612{
1613	if (!cl->level) {
1614		WARN_ON(!cl->leaf.q);
1615		qdisc_put(cl->leaf.q);
1616	}
1617	gen_kill_estimator(&cl->rate_est);
1618	tcf_block_put(cl->block);
1619	kfree(cl);
1620}
1621
1622static void htb_destroy(struct Qdisc *sch)
1623{
1624	struct net_device *dev = qdisc_dev(sch);
1625	struct tc_htb_qopt_offload offload_opt;
1626	struct htb_sched *q = qdisc_priv(sch);
1627	struct hlist_node *next;
1628	bool nonempty, changed;
1629	struct htb_class *cl;
1630	unsigned int i;
1631
1632	cancel_work_sync(&q->work);
1633	qdisc_watchdog_cancel(&q->watchdog);
1634	/* This line used to be after htb_destroy_class call below
1635	 * and surprisingly it worked in 2.4. But it must precede it
1636	 * because filter need its target class alive to be able to call
1637	 * unbind_filter on it (without Oops).
1638	 */
1639	tcf_block_put(q->block);
1640
1641	for (i = 0; i < q->clhash.hashsize; i++) {
1642		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1643			tcf_block_put(cl->block);
1644			cl->block = NULL;
1645		}
1646	}
1647
1648	do {
1649		nonempty = false;
1650		changed = false;
1651		for (i = 0; i < q->clhash.hashsize; i++) {
1652			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1653						  common.hnode) {
1654				bool last_child;
1655
1656				if (!q->offload) {
1657					htb_destroy_class(sch, cl);
1658					continue;
1659				}
1660
1661				nonempty = true;
1662
1663				if (cl->level)
1664					continue;
1665
1666				changed = true;
1667
1668				last_child = htb_parent_last_child(cl);
1669				htb_destroy_class_offload(sch, cl, last_child,
1670							  true, NULL);
1671				qdisc_class_hash_remove(&q->clhash,
1672							&cl->common);
1673				if (cl->parent)
1674					cl->parent->children--;
1675				if (last_child)
1676					htb_parent_to_leaf(sch, cl, NULL);
1677				htb_destroy_class(sch, cl);
1678			}
1679		}
1680	} while (changed);
1681	WARN_ON(nonempty);
1682
1683	qdisc_class_hash_destroy(&q->clhash);
1684	__qdisc_reset_queue(&q->direct_queue);
1685
1686	if (q->offload) {
1687		offload_opt = (struct tc_htb_qopt_offload) {
1688			.command = TC_HTB_DESTROY,
1689		};
1690		htb_offload(dev, &offload_opt);
1691	}
1692
1693	if (!q->direct_qdiscs)
1694		return;
1695	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1696		qdisc_put(q->direct_qdiscs[i]);
1697	kfree(q->direct_qdiscs);
1698}
1699
1700static int htb_delete(struct Qdisc *sch, unsigned long arg,
1701		      struct netlink_ext_ack *extack)
1702{
1703	struct htb_sched *q = qdisc_priv(sch);
1704	struct htb_class *cl = (struct htb_class *)arg;
1705	struct Qdisc *new_q = NULL;
1706	int last_child = 0;
1707	int err;
1708
1709	/* TODO: why don't allow to delete subtree ? references ? does
1710	 * tc subsys guarantee us that in htb_destroy it holds no class
1711	 * refs so that we can remove children safely there ?
1712	 */
1713	if (cl->children || cl->filter_cnt)
1714		return -EBUSY;
1715
1716	if (!cl->level && htb_parent_last_child(cl))
1717		last_child = 1;
1718
1719	if (q->offload) {
1720		err = htb_destroy_class_offload(sch, cl, last_child, false,
1721						extack);
1722		if (err)
1723			return err;
1724	}
1725
1726	if (last_child) {
1727		struct netdev_queue *dev_queue = sch->dev_queue;
1728
1729		if (q->offload)
1730			dev_queue = htb_offload_get_queue(cl);
1731
1732		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1733					  cl->parent->common.classid,
1734					  NULL);
1735		if (q->offload) {
1736			if (new_q)
1737				htb_set_lockdep_class_child(new_q);
1738			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1739		}
1740	}
1741
1742	sch_tree_lock(sch);
1743
1744	if (!cl->level)
1745		qdisc_purge_queue(cl->leaf.q);
1746
1747	/* delete from hash and active; remainder in destroy_class */
1748	qdisc_class_hash_remove(&q->clhash, &cl->common);
1749	if (cl->parent)
1750		cl->parent->children--;
1751
1752	if (cl->prio_activity)
1753		htb_deactivate(q, cl);
1754
1755	if (cl->cmode != HTB_CAN_SEND)
1756		htb_safe_rb_erase(&cl->pq_node,
1757				  &q->hlevel[cl->level].wait_pq);
1758
1759	if (last_child)
1760		htb_parent_to_leaf(sch, cl, new_q);
1761
1762	sch_tree_unlock(sch);
1763
1764	htb_destroy_class(sch, cl);
1765	return 0;
1766}
1767
1768static int htb_change_class(struct Qdisc *sch, u32 classid,
1769			    u32 parentid, struct nlattr **tca,
1770			    unsigned long *arg, struct netlink_ext_ack *extack)
1771{
1772	int err = -EINVAL;
1773	struct htb_sched *q = qdisc_priv(sch);
1774	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1775	struct tc_htb_qopt_offload offload_opt;
1776	struct nlattr *opt = tca[TCA_OPTIONS];
1777	struct nlattr *tb[TCA_HTB_MAX + 1];
1778	struct Qdisc *parent_qdisc = NULL;
1779	struct netdev_queue *dev_queue;
1780	struct tc_htb_opt *hopt;
1781	u64 rate64, ceil64;
1782	int warn = 0;
1783
1784	/* extract all subattrs from opt attr */
1785	if (!opt)
1786		goto failure;
1787
1788	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1789					  NULL);
1790	if (err < 0)
1791		goto failure;
1792
1793	err = -EINVAL;
1794	if (tb[TCA_HTB_PARMS] == NULL)
1795		goto failure;
1796
1797	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1798
1799	hopt = nla_data(tb[TCA_HTB_PARMS]);
1800	if (!hopt->rate.rate || !hopt->ceil.rate)
1801		goto failure;
1802
1803	if (q->offload) {
1804		/* Options not supported by the offload. */
1805		if (hopt->rate.overhead || hopt->ceil.overhead) {
1806			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1807			goto failure;
1808		}
1809		if (hopt->rate.mpu || hopt->ceil.mpu) {
1810			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1811			goto failure;
1812		}
1813		if (hopt->quantum) {
1814			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
1815			goto failure;
1816		}
1817		if (hopt->prio) {
1818			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
1819			goto failure;
1820		}
1821	}
1822
1823	/* Keeping backward compatible with rate_table based iproute2 tc */
1824	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1825		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1826					      NULL));
1827
1828	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1829		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1830					      NULL));
1831
1832	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1833	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1834
1835	if (!cl) {		/* new class */
1836		struct net_device *dev = qdisc_dev(sch);
1837		struct Qdisc *new_q, *old_q;
1838		int prio;
1839		struct {
1840			struct nlattr		nla;
1841			struct gnet_estimator	opt;
1842		} est = {
1843			.nla = {
1844				.nla_len	= nla_attr_size(sizeof(est.opt)),
1845				.nla_type	= TCA_RATE,
1846			},
1847			.opt = {
1848				/* 4s interval, 16s averaging constant */
1849				.interval	= 2,
1850				.ewma_log	= 2,
1851			},
1852		};
1853
1854		/* check for valid classid */
1855		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1856		    htb_find(classid, sch))
1857			goto failure;
1858
1859		/* check maximal depth */
1860		if (parent && parent->parent && parent->parent->level < 2) {
1861			pr_err("htb: tree is too deep\n");
1862			goto failure;
1863		}
1864		err = -ENOBUFS;
1865		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1866		if (!cl)
1867			goto failure;
1868
1869		gnet_stats_basic_sync_init(&cl->bstats);
1870		gnet_stats_basic_sync_init(&cl->bstats_bias);
1871
1872		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1873		if (err) {
1874			kfree(cl);
1875			goto failure;
1876		}
1877		if (htb_rate_est || tca[TCA_RATE]) {
1878			err = gen_new_estimator(&cl->bstats, NULL,
1879						&cl->rate_est,
1880						NULL,
1881						true,
1882						tca[TCA_RATE] ? : &est.nla);
1883			if (err)
1884				goto err_block_put;
 
 
 
1885		}
1886
1887		cl->children = 0;
1888		RB_CLEAR_NODE(&cl->pq_node);
1889
1890		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1891			RB_CLEAR_NODE(&cl->node[prio]);
1892
1893		cl->common.classid = classid;
1894
1895		/* Make sure nothing interrupts us in between of two
1896		 * ndo_setup_tc calls.
1897		 */
1898		ASSERT_RTNL();
1899
1900		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1901		 * so that can't be used inside of sch_tree_lock
1902		 * -- thanks to Karlis Peisenieks
1903		 */
1904		if (!q->offload) {
1905			dev_queue = sch->dev_queue;
1906		} else if (!(parent && !parent->level)) {
1907			/* Assign a dev_queue to this classid. */
1908			offload_opt = (struct tc_htb_qopt_offload) {
1909				.command = TC_HTB_LEAF_ALLOC_QUEUE,
1910				.classid = cl->common.classid,
1911				.parent_classid = parent ?
1912					TC_H_MIN(parent->common.classid) :
1913					TC_HTB_CLASSID_ROOT,
1914				.rate = max_t(u64, hopt->rate.rate, rate64),
1915				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1916				.extack = extack,
1917			};
1918			err = htb_offload(dev, &offload_opt);
1919			if (err) {
1920				pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
1921				       err);
1922				goto err_kill_estimator;
1923			}
1924			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1925		} else { /* First child. */
1926			dev_queue = htb_offload_get_queue(parent);
1927			old_q = htb_graft_helper(dev_queue, NULL);
1928			WARN_ON(old_q != parent->leaf.q);
1929			offload_opt = (struct tc_htb_qopt_offload) {
1930				.command = TC_HTB_LEAF_TO_INNER,
1931				.classid = cl->common.classid,
1932				.parent_classid =
1933					TC_H_MIN(parent->common.classid),
1934				.rate = max_t(u64, hopt->rate.rate, rate64),
1935				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1936				.extack = extack,
1937			};
1938			err = htb_offload(dev, &offload_opt);
1939			if (err) {
1940				pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
1941				       err);
1942				htb_graft_helper(dev_queue, old_q);
1943				goto err_kill_estimator;
1944			}
1945			_bstats_update(&parent->bstats_bias,
1946				       u64_stats_read(&old_q->bstats.bytes),
1947				       u64_stats_read(&old_q->bstats.packets));
1948			qdisc_put(old_q);
1949		}
1950		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1951					  classid, NULL);
1952		if (q->offload) {
1953			if (new_q) {
1954				htb_set_lockdep_class_child(new_q);
1955				/* One ref for cl->leaf.q, the other for
1956				 * dev_queue->qdisc.
1957				 */
1958				qdisc_refcount_inc(new_q);
1959			}
1960			old_q = htb_graft_helper(dev_queue, new_q);
1961			/* No qdisc_put needed. */
1962			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1963		}
1964		sch_tree_lock(sch);
1965		if (parent && !parent->level) {
1966			/* turn parent into inner node */
1967			qdisc_purge_queue(parent->leaf.q);
1968			parent_qdisc = parent->leaf.q;
1969			if (parent->prio_activity)
1970				htb_deactivate(q, parent);
1971
1972			/* remove from evt list because of level change */
1973			if (parent->cmode != HTB_CAN_SEND) {
1974				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1975				parent->cmode = HTB_CAN_SEND;
1976			}
1977			parent->level = (parent->parent ? parent->parent->level
1978					 : TC_HTB_MAXDEPTH) - 1;
1979			memset(&parent->inner, 0, sizeof(parent->inner));
1980		}
1981
1982		/* leaf (we) needs elementary qdisc */
1983		cl->leaf.q = new_q ? new_q : &noop_qdisc;
1984		if (q->offload)
1985			cl->leaf.offload_queue = dev_queue;
1986
 
1987		cl->parent = parent;
1988
1989		/* set class to be in HTB_CAN_SEND state */
1990		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1991		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1992		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
1993		cl->t_c = ktime_get_ns();
1994		cl->cmode = HTB_CAN_SEND;
1995
1996		/* attach to the hash list and parent's family */
1997		qdisc_class_hash_insert(&q->clhash, &cl->common);
1998		if (parent)
1999			parent->children++;
2000		if (cl->leaf.q != &noop_qdisc)
2001			qdisc_hash_add(cl->leaf.q, true);
2002	} else {
2003		if (tca[TCA_RATE]) {
2004			err = gen_replace_estimator(&cl->bstats, NULL,
2005						    &cl->rate_est,
2006						    NULL,
2007						    true,
2008						    tca[TCA_RATE]);
2009			if (err)
2010				return err;
2011		}
 
 
2012
2013		if (q->offload) {
2014			struct net_device *dev = qdisc_dev(sch);
2015
2016			offload_opt = (struct tc_htb_qopt_offload) {
2017				.command = TC_HTB_NODE_MODIFY,
2018				.classid = cl->common.classid,
2019				.rate = max_t(u64, hopt->rate.rate, rate64),
2020				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
2021				.extack = extack,
2022			};
2023			err = htb_offload(dev, &offload_opt);
2024			if (err)
2025				/* Estimator was replaced, and rollback may fail
2026				 * as well, so we don't try to recover it, and
2027				 * the estimator won't work property with the
2028				 * offload anyway, because bstats are updated
2029				 * only when the stats are queried.
2030				 */
2031				return err;
2032		}
2033
2034		sch_tree_lock(sch);
2035	}
2036
2037	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2038	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2039
2040	/* it used to be a nasty bug here, we have to check that node
2041	 * is really leaf before changing cl->leaf !
2042	 */
2043	if (!cl->level) {
2044		u64 quantum = cl->rate.rate_bytes_ps;
2045
2046		do_div(quantum, q->rate2quantum);
2047		cl->quantum = min_t(u64, quantum, INT_MAX);
2048
2049		if (!hopt->quantum && cl->quantum < 1000) {
2050			warn = -1;
2051			cl->quantum = 1000;
2052		}
2053		if (!hopt->quantum && cl->quantum > 200000) {
2054			warn = 1;
2055			cl->quantum = 200000;
2056		}
2057		if (hopt->quantum)
2058			cl->quantum = hopt->quantum;
2059		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2060			cl->prio = TC_HTB_NUMPRIO - 1;
2061	}
2062
2063	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2064	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2065
2066	sch_tree_unlock(sch);
2067	qdisc_put(parent_qdisc);
2068
2069	if (warn)
2070		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
2071			    cl->common.classid, (warn == -1 ? "small" : "big"));
2072
2073	qdisc_class_hash_grow(sch, &q->clhash);
2074
2075	*arg = (unsigned long)cl;
2076	return 0;
2077
2078err_kill_estimator:
2079	gen_kill_estimator(&cl->rate_est);
2080err_block_put:
2081	tcf_block_put(cl->block);
2082	kfree(cl);
2083failure:
2084	return err;
2085}
2086
2087static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2088				       struct netlink_ext_ack *extack)
2089{
2090	struct htb_sched *q = qdisc_priv(sch);
2091	struct htb_class *cl = (struct htb_class *)arg;
2092
2093	return cl ? cl->block : q->block;
2094}
2095
2096static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2097				     u32 classid)
2098{
2099	struct htb_class *cl = htb_find(classid, sch);
2100
2101	/*if (cl && !cl->level) return 0;
2102	 * The line above used to be there to prevent attaching filters to
2103	 * leaves. But at least tc_index filter uses this just to get class
2104	 * for other reasons so that we have to allow for it.
2105	 * ----
2106	 * 19.6.2002 As Werner explained it is ok - bind filter is just
2107	 * another way to "lock" the class - unlike "get" this lock can
2108	 * be broken by class during destroy IIUC.
2109	 */
2110	if (cl)
2111		cl->filter_cnt++;
2112	return (unsigned long)cl;
2113}
2114
2115static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2116{
2117	struct htb_class *cl = (struct htb_class *)arg;
2118
2119	if (cl)
2120		cl->filter_cnt--;
2121}
2122
2123static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2124{
2125	struct htb_sched *q = qdisc_priv(sch);
2126	struct htb_class *cl;
2127	unsigned int i;
2128
2129	if (arg->stop)
2130		return;
2131
2132	for (i = 0; i < q->clhash.hashsize; i++) {
2133		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2134			if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
 
 
 
 
 
2135				return;
 
 
2136		}
2137	}
2138}
2139
2140static const struct Qdisc_class_ops htb_class_ops = {
2141	.select_queue	=	htb_select_queue,
2142	.graft		=	htb_graft,
2143	.leaf		=	htb_leaf,
2144	.qlen_notify	=	htb_qlen_notify,
2145	.find		=	htb_search,
2146	.change		=	htb_change_class,
2147	.delete		=	htb_delete,
2148	.walk		=	htb_walk,
2149	.tcf_block	=	htb_tcf_block,
2150	.bind_tcf	=	htb_bind_filter,
2151	.unbind_tcf	=	htb_unbind_filter,
2152	.dump		=	htb_dump_class,
2153	.dump_stats	=	htb_dump_class_stats,
2154};
2155
2156static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2157	.cl_ops		=	&htb_class_ops,
2158	.id		=	"htb",
2159	.priv_size	=	sizeof(struct htb_sched),
2160	.enqueue	=	htb_enqueue,
2161	.dequeue	=	htb_dequeue,
2162	.peek		=	qdisc_peek_dequeued,
2163	.init		=	htb_init,
2164	.attach		=	htb_attach,
2165	.reset		=	htb_reset,
2166	.destroy	=	htb_destroy,
2167	.dump		=	htb_dump,
2168	.owner		=	THIS_MODULE,
2169};
2170
2171static int __init htb_module_init(void)
2172{
2173	return register_qdisc(&htb_qdisc_ops);
2174}
2175static void __exit htb_module_exit(void)
2176{
2177	unregister_qdisc(&htb_qdisc_ops);
2178}
2179
2180module_init(htb_module_init)
2181module_exit(htb_module_exit)
2182MODULE_LICENSE("GPL");