sch_taprio.c - net/sched/sch_taprio.c - Linux source code v3.5.6

Note: File does not exist in v3.5.6.
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/* net/sched/sch_taprio.c	 Time Aware Priority Scheduler
   4 *
   5 * Authors:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
   6 *
   7 */
   8
   9#include <linux/ethtool.h>
  10#include <linux/ethtool_netlink.h>
  11#include <linux/types.h>
  12#include <linux/slab.h>
  13#include <linux/kernel.h>
  14#include <linux/string.h>
  15#include <linux/list.h>
  16#include <linux/errno.h>
  17#include <linux/skbuff.h>
  18#include <linux/math64.h>
  19#include <linux/module.h>
  20#include <linux/spinlock.h>
  21#include <linux/rcupdate.h>
  22#include <linux/time.h>
  23#include <net/gso.h>
  24#include <net/netlink.h>
  25#include <net/pkt_sched.h>
  26#include <net/pkt_cls.h>
  27#include <net/sch_generic.h>
  28#include <net/sock.h>
  29#include <net/tcp.h>
  30
  31#define TAPRIO_STAT_NOT_SET	(~0ULL)
  32
  33#include "sch_mqprio_lib.h"
  34
  35static LIST_HEAD(taprio_list);
  36static struct static_key_false taprio_have_broken_mqprio;
  37static struct static_key_false taprio_have_working_mqprio;
  38
  39#define TAPRIO_ALL_GATES_OPEN -1
  40
  41#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
  42#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
  43#define TAPRIO_FLAGS_INVALID U32_MAX
  44
  45struct sched_entry {
  46	/* Durations between this GCL entry and the GCL entry where the
  47	 * respective traffic class gate closes
  48	 */
  49	u64 gate_duration[TC_MAX_QUEUE];
  50	atomic_t budget[TC_MAX_QUEUE];
  51	/* The qdisc makes some effort so that no packet leaves
  52	 * after this time
  53	 */
  54	ktime_t gate_close_time[TC_MAX_QUEUE];
  55	struct list_head list;
  56	/* Used to calculate when to advance the schedule */
  57	ktime_t end_time;
  58	ktime_t next_txtime;
  59	int index;
  60	u32 gate_mask;
  61	u32 interval;
  62	u8 command;
  63};
  64
  65struct sched_gate_list {
  66	/* Longest non-zero contiguous gate durations per traffic class,
  67	 * or 0 if a traffic class gate never opens during the schedule.
  68	 */
  69	u64 max_open_gate_duration[TC_MAX_QUEUE];
  70	u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
  71	u32 max_sdu[TC_MAX_QUEUE]; /* for dump */
  72	struct rcu_head rcu;
  73	struct list_head entries;
  74	size_t num_entries;
  75	ktime_t cycle_end_time;
  76	s64 cycle_time;
  77	s64 cycle_time_extension;
  78	s64 base_time;
  79};
  80
  81struct taprio_sched {
  82	struct Qdisc **qdiscs;
  83	struct Qdisc *root;
  84	u32 flags;
  85	enum tk_offsets tk_offset;
  86	int clockid;
  87	bool offloaded;
  88	bool detected_mqprio;
  89	bool broken_mqprio;
  90	atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
  91				    * speeds it's sub-nanoseconds per byte
  92				    */
  93
  94	/* Protects the update side of the RCU protected current_entry */
  95	spinlock_t current_entry_lock;
  96	struct sched_entry __rcu *current_entry;
  97	struct sched_gate_list __rcu *oper_sched;
  98	struct sched_gate_list __rcu *admin_sched;
  99	struct hrtimer advance_timer;
 100	struct list_head taprio_list;
 101	int cur_txq[TC_MAX_QUEUE];
 102	u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
 103	u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
 104	u32 txtime_delay;
 105};
 106
 107struct __tc_taprio_qopt_offload {
 108	refcount_t users;
 109	struct tc_taprio_qopt_offload offload;
 110};
 111
 112static void taprio_calculate_gate_durations(struct taprio_sched *q,
 113					    struct sched_gate_list *sched)
 114{
 115	struct net_device *dev = qdisc_dev(q->root);
 116	int num_tc = netdev_get_num_tc(dev);
 117	struct sched_entry *entry, *cur;
 118	int tc;
 119
 120	list_for_each_entry(entry, &sched->entries, list) {
 121		u32 gates_still_open = entry->gate_mask;
 122
 123		/* For each traffic class, calculate each open gate duration,
 124		 * starting at this schedule entry and ending at the schedule
 125		 * entry containing a gate close event for that TC.
 126		 */
 127		cur = entry;
 128
 129		do {
 130			if (!gates_still_open)
 131				break;
 132
 133			for (tc = 0; tc < num_tc; tc++) {
 134				if (!(gates_still_open & BIT(tc)))
 135					continue;
 136
 137				if (cur->gate_mask & BIT(tc))
 138					entry->gate_duration[tc] += cur->interval;
 139				else
 140					gates_still_open &= ~BIT(tc);
 141			}
 142
 143			cur = list_next_entry_circular(cur, &sched->entries, list);
 144		} while (cur != entry);
 145
 146		/* Keep track of the maximum gate duration for each traffic
 147		 * class, taking care to not confuse a traffic class which is
 148		 * temporarily closed with one that is always closed.
 149		 */
 150		for (tc = 0; tc < num_tc; tc++)
 151			if (entry->gate_duration[tc] &&
 152			    sched->max_open_gate_duration[tc] < entry->gate_duration[tc])
 153				sched->max_open_gate_duration[tc] = entry->gate_duration[tc];
 154	}
 155}
 156
 157static bool taprio_entry_allows_tx(ktime_t skb_end_time,
 158				   struct sched_entry *entry, int tc)
 159{
 160	return ktime_before(skb_end_time, entry->gate_close_time[tc]);
 161}
 162
 163static ktime_t sched_base_time(const struct sched_gate_list *sched)
 164{
 165	if (!sched)
 166		return KTIME_MAX;
 167
 168	return ns_to_ktime(sched->base_time);
 169}
 170
 171static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
 172{
 173	/* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
 174	enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
 175
 176	switch (tk_offset) {
 177	case TK_OFFS_MAX:
 178		return mono;
 179	default:
 180		return ktime_mono_to_any(mono, tk_offset);
 181	}
 182}
 183
 184static ktime_t taprio_get_time(const struct taprio_sched *q)
 185{
 186	return taprio_mono_to_any(q, ktime_get());
 187}
 188
 189static void taprio_free_sched_cb(struct rcu_head *head)
 190{
 191	struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
 192	struct sched_entry *entry, *n;
 193
 194	list_for_each_entry_safe(entry, n, &sched->entries, list) {
 195		list_del(&entry->list);
 196		kfree(entry);
 197	}
 198
 199	kfree(sched);
 200}
 201
 202static void switch_schedules(struct taprio_sched *q,
 203			     struct sched_gate_list **admin,
 204			     struct sched_gate_list **oper)
 205{
 206	rcu_assign_pointer(q->oper_sched, *admin);
 207	rcu_assign_pointer(q->admin_sched, NULL);
 208
 209	if (*oper)
 210		call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
 211
 212	*oper = *admin;
 213	*admin = NULL;
 214}
 215
 216/* Get how much time has been already elapsed in the current cycle. */
 217static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
 218{
 219	ktime_t time_since_sched_start;
 220	s32 time_elapsed;
 221
 222	time_since_sched_start = ktime_sub(time, sched->base_time);
 223	div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
 224
 225	return time_elapsed;
 226}
 227
 228static ktime_t get_interval_end_time(struct sched_gate_list *sched,
 229				     struct sched_gate_list *admin,
 230				     struct sched_entry *entry,
 231				     ktime_t intv_start)
 232{
 233	s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
 234	ktime_t intv_end, cycle_ext_end, cycle_end;
 235
 236	cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
 237	intv_end = ktime_add_ns(intv_start, entry->interval);
 238	cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
 239
 240	if (ktime_before(intv_end, cycle_end))
 241		return intv_end;
 242	else if (admin && admin != sched &&
 243		 ktime_after(admin->base_time, cycle_end) &&
 244		 ktime_before(admin->base_time, cycle_ext_end))
 245		return admin->base_time;
 246	else
 247		return cycle_end;
 248}
 249
 250static int length_to_duration(struct taprio_sched *q, int len)
 251{
 252	return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
 253}
 254
 255static int duration_to_length(struct taprio_sched *q, u64 duration)
 256{
 257	return div_u64(duration * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte));
 258}
 259
 260/* Sets sched->max_sdu[] and sched->max_frm_len[] to the minimum between the
 261 * q->max_sdu[] requested by the user and the max_sdu dynamically determined by
 262 * the maximum open gate durations at the given link speed.
 263 */
 264static void taprio_update_queue_max_sdu(struct taprio_sched *q,
 265					struct sched_gate_list *sched,
 266					struct qdisc_size_table *stab)
 267{
 268	struct net_device *dev = qdisc_dev(q->root);
 269	int num_tc = netdev_get_num_tc(dev);
 270	u32 max_sdu_from_user;
 271	u32 max_sdu_dynamic;
 272	u32 max_sdu;
 273	int tc;
 274
 275	for (tc = 0; tc < num_tc; tc++) {
 276		max_sdu_from_user = q->max_sdu[tc] ?: U32_MAX;
 277
 278		/* TC gate never closes => keep the queueMaxSDU
 279		 * selected by the user
 280		 */
 281		if (sched->max_open_gate_duration[tc] == sched->cycle_time) {
 282			max_sdu_dynamic = U32_MAX;
 283		} else {
 284			u32 max_frm_len;
 285
 286			max_frm_len = duration_to_length(q, sched->max_open_gate_duration[tc]);
 287			/* Compensate for L1 overhead from size table,
 288			 * but don't let the frame size go negative
 289			 */
 290			if (stab) {
 291				max_frm_len -= stab->szopts.overhead;
 292				max_frm_len = max_t(int, max_frm_len,
 293						    dev->hard_header_len + 1);
 294			}
 295			max_sdu_dynamic = max_frm_len - dev->hard_header_len;
 296			if (max_sdu_dynamic > dev->max_mtu)
 297				max_sdu_dynamic = U32_MAX;
 298		}
 299
 300		max_sdu = min(max_sdu_dynamic, max_sdu_from_user);
 301
 302		if (max_sdu != U32_MAX) {
 303			sched->max_frm_len[tc] = max_sdu + dev->hard_header_len;
 304			sched->max_sdu[tc] = max_sdu;
 305		} else {
 306			sched->max_frm_len[tc] = U32_MAX; /* never oversized */
 307			sched->max_sdu[tc] = 0;
 308		}
 309	}
 310}
 311
 312/* Returns the entry corresponding to next available interval. If
 313 * validate_interval is set, it only validates whether the timestamp occurs
 314 * when the gate corresponding to the skb's traffic class is open.
 315 */
 316static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
 317						  struct Qdisc *sch,
 318						  struct sched_gate_list *sched,
 319						  struct sched_gate_list *admin,
 320						  ktime_t time,
 321						  ktime_t *interval_start,
 322						  ktime_t *interval_end,
 323						  bool validate_interval)
 324{
 325	ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
 326	ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
 327	struct sched_entry *entry = NULL, *entry_found = NULL;
 328	struct taprio_sched *q = qdisc_priv(sch);
 329	struct net_device *dev = qdisc_dev(sch);
 330	bool entry_available = false;
 331	s32 cycle_elapsed;
 332	int tc, n;
 333
 334	tc = netdev_get_prio_tc_map(dev, skb->priority);
 335	packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
 336
 337	*interval_start = 0;
 338	*interval_end = 0;
 339
 340	if (!sched)
 341		return NULL;
 342
 343	cycle = sched->cycle_time;
 344	cycle_elapsed = get_cycle_time_elapsed(sched, time);
 345	curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
 346	cycle_end = ktime_add_ns(curr_intv_end, cycle);
 347
 348	list_for_each_entry(entry, &sched->entries, list) {
 349		curr_intv_start = curr_intv_end;
 350		curr_intv_end = get_interval_end_time(sched, admin, entry,
 351						      curr_intv_start);
 352
 353		if (ktime_after(curr_intv_start, cycle_end))
 354			break;
 355
 356		if (!(entry->gate_mask & BIT(tc)) ||
 357		    packet_transmit_time > entry->interval)
 358			continue;
 359
 360		txtime = entry->next_txtime;
 361
 362		if (ktime_before(txtime, time) || validate_interval) {
 363			transmit_end_time = ktime_add_ns(time, packet_transmit_time);
 364			if ((ktime_before(curr_intv_start, time) &&
 365			     ktime_before(transmit_end_time, curr_intv_end)) ||
 366			    (ktime_after(curr_intv_start, time) && !validate_interval)) {
 367				entry_found = entry;
 368				*interval_start = curr_intv_start;
 369				*interval_end = curr_intv_end;
 370				break;
 371			} else if (!entry_available && !validate_interval) {
 372				/* Here, we are just trying to find out the
 373				 * first available interval in the next cycle.
 374				 */
 375				entry_available = true;
 376				entry_found = entry;
 377				*interval_start = ktime_add_ns(curr_intv_start, cycle);
 378				*interval_end = ktime_add_ns(curr_intv_end, cycle);
 379			}
 380		} else if (ktime_before(txtime, earliest_txtime) &&
 381			   !entry_available) {
 382			earliest_txtime = txtime;
 383			entry_found = entry;
 384			n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
 385			*interval_start = ktime_add(curr_intv_start, n * cycle);
 386			*interval_end = ktime_add(curr_intv_end, n * cycle);
 387		}
 388	}
 389
 390	return entry_found;
 391}
 392
 393static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
 394{
 395	struct taprio_sched *q = qdisc_priv(sch);
 396	struct sched_gate_list *sched, *admin;
 397	ktime_t interval_start, interval_end;
 398	struct sched_entry *entry;
 399
 400	rcu_read_lock();
 401	sched = rcu_dereference(q->oper_sched);
 402	admin = rcu_dereference(q->admin_sched);
 403
 404	entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
 405				       &interval_start, &interval_end, true);
 406	rcu_read_unlock();
 407
 408	return entry;
 409}
 410
 411static bool taprio_flags_valid(u32 flags)
 412{
 413	/* Make sure no other flag bits are set. */
 414	if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
 415		      TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
 416		return false;
 417	/* txtime-assist and full offload are mutually exclusive */
 418	if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
 419	    (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
 420		return false;
 421	return true;
 422}
 423
 424/* This returns the tstamp value set by TCP in terms of the set clock. */
 425static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
 426{
 427	unsigned int offset = skb_network_offset(skb);
 428	const struct ipv6hdr *ipv6h;
 429	const struct iphdr *iph;
 430	struct ipv6hdr _ipv6h;
 431
 432	ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
 433	if (!ipv6h)
 434		return 0;
 435
 436	if (ipv6h->version == 4) {
 437		iph = (struct iphdr *)ipv6h;
 438		offset += iph->ihl * 4;
 439
 440		/* special-case 6in4 tunnelling, as that is a common way to get
 441		 * v6 connectivity in the home
 442		 */
 443		if (iph->protocol == IPPROTO_IPV6) {
 444			ipv6h = skb_header_pointer(skb, offset,
 445						   sizeof(_ipv6h), &_ipv6h);
 446
 447			if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
 448				return 0;
 449		} else if (iph->protocol != IPPROTO_TCP) {
 450			return 0;
 451		}
 452	} else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
 453		return 0;
 454	}
 455
 456	return taprio_mono_to_any(q, skb->skb_mstamp_ns);
 457}
 458
 459/* There are a few scenarios where we will have to modify the txtime from
 460 * what is read from next_txtime in sched_entry. They are:
 461 * 1. If txtime is in the past,
 462 *    a. The gate for the traffic class is currently open and packet can be
 463 *       transmitted before it closes, schedule the packet right away.
 464 *    b. If the gate corresponding to the traffic class is going to open later
 465 *       in the cycle, set the txtime of packet to the interval start.
 466 * 2. If txtime is in the future, there are packets corresponding to the
 467 *    current traffic class waiting to be transmitted. So, the following
 468 *    possibilities exist:
 469 *    a. We can transmit the packet before the window containing the txtime
 470 *       closes.
 471 *    b. The window might close before the transmission can be completed
 472 *       successfully. So, schedule the packet in the next open window.
 473 */
 474static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
 475{
 476	ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
 477	struct taprio_sched *q = qdisc_priv(sch);
 478	struct sched_gate_list *sched, *admin;
 479	ktime_t minimum_time, now, txtime;
 480	int len, packet_transmit_time;
 481	struct sched_entry *entry;
 482	bool sched_changed;
 483
 484	now = taprio_get_time(q);
 485	minimum_time = ktime_add_ns(now, q->txtime_delay);
 486
 487	tcp_tstamp = get_tcp_tstamp(q, skb);
 488	minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
 489
 490	rcu_read_lock();
 491	admin = rcu_dereference(q->admin_sched);
 492	sched = rcu_dereference(q->oper_sched);
 493	if (admin && ktime_after(minimum_time, admin->base_time))
 494		switch_schedules(q, &admin, &sched);
 495
 496	/* Until the schedule starts, all the queues are open */
 497	if (!sched || ktime_before(minimum_time, sched->base_time)) {
 498		txtime = minimum_time;
 499		goto done;
 500	}
 501
 502	len = qdisc_pkt_len(skb);
 503	packet_transmit_time = length_to_duration(q, len);
 504
 505	do {
 506		sched_changed = false;
 507
 508		entry = find_entry_to_transmit(skb, sch, sched, admin,
 509					       minimum_time,
 510					       &interval_start, &interval_end,
 511					       false);
 512		if (!entry) {
 513			txtime = 0;
 514			goto done;
 515		}
 516
 517		txtime = entry->next_txtime;
 518		txtime = max_t(ktime_t, txtime, minimum_time);
 519		txtime = max_t(ktime_t, txtime, interval_start);
 520
 521		if (admin && admin != sched &&
 522		    ktime_after(txtime, admin->base_time)) {
 523			sched = admin;
 524			sched_changed = true;
 525			continue;
 526		}
 527
 528		transmit_end_time = ktime_add(txtime, packet_transmit_time);
 529		minimum_time = transmit_end_time;
 530
 531		/* Update the txtime of current entry to the next time it's
 532		 * interval starts.
 533		 */
 534		if (ktime_after(transmit_end_time, interval_end))
 535			entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
 536	} while (sched_changed || ktime_after(transmit_end_time, interval_end));
 537
 538	entry->next_txtime = transmit_end_time;
 539
 540done:
 541	rcu_read_unlock();
 542	return txtime;
 543}
 544
 545/* Devices with full offload are expected to honor this in hardware */
 546static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch,
 547					     struct sk_buff *skb)
 548{
 549	struct taprio_sched *q = qdisc_priv(sch);
 550	struct net_device *dev = qdisc_dev(sch);
 551	struct sched_gate_list *sched;
 552	int prio = skb->priority;
 553	bool exceeds = false;
 554	u8 tc;
 555
 556	tc = netdev_get_prio_tc_map(dev, prio);
 557
 558	rcu_read_lock();
 559	sched = rcu_dereference(q->oper_sched);
 560	if (sched && skb->len > sched->max_frm_len[tc])
 561		exceeds = true;
 562	rcu_read_unlock();
 563
 564	return exceeds;
 565}
 566
 567static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
 568			      struct Qdisc *child, struct sk_buff **to_free)
 569{
 570	struct taprio_sched *q = qdisc_priv(sch);
 571
 572	/* sk_flags are only safe to use on full sockets. */
 573	if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
 574		if (!is_valid_interval(skb, sch))
 575			return qdisc_drop(skb, sch, to_free);
 576	} else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
 577		skb->tstamp = get_packet_txtime(skb, sch);
 578		if (!skb->tstamp)
 579			return qdisc_drop(skb, sch, to_free);
 580	}
 581
 582	qdisc_qstats_backlog_inc(sch, skb);
 583	sch->q.qlen++;
 584
 585	return qdisc_enqueue(skb, child, to_free);
 586}
 587
 588static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch,
 589				    struct Qdisc *child,
 590				    struct sk_buff **to_free)
 591{
 592	unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
 593	netdev_features_t features = netif_skb_features(skb);
 594	struct sk_buff *segs, *nskb;
 595	int ret;
 596
 597	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
 598	if (IS_ERR_OR_NULL(segs))
 599		return qdisc_drop(skb, sch, to_free);
 600
 601	skb_list_walk_safe(segs, segs, nskb) {
 602		skb_mark_not_on_list(segs);
 603		qdisc_skb_cb(segs)->pkt_len = segs->len;
 604		slen += segs->len;
 605
 606		/* FIXME: we should be segmenting to a smaller size
 607		 * rather than dropping these
 608		 */
 609		if (taprio_skb_exceeds_queue_max_sdu(sch, segs))
 610			ret = qdisc_drop(segs, sch, to_free);
 611		else
 612			ret = taprio_enqueue_one(segs, sch, child, to_free);
 613
 614		if (ret != NET_XMIT_SUCCESS) {
 615			if (net_xmit_drop_count(ret))
 616				qdisc_qstats_drop(sch);
 617		} else {
 618			numsegs++;
 619		}
 620	}
 621
 622	if (numsegs > 1)
 623		qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
 624	consume_skb(skb);
 625
 626	return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
 627}
 628
 629/* Will not be called in the full offload case, since the TX queues are
 630 * attached to the Qdisc created using qdisc_create_dflt()
 631 */
 632static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 633			  struct sk_buff **to_free)
 634{
 635	struct taprio_sched *q = qdisc_priv(sch);
 636	struct Qdisc *child;
 637	int queue;
 638
 639	queue = skb_get_queue_mapping(skb);
 640
 641	child = q->qdiscs[queue];
 642	if (unlikely(!child))
 643		return qdisc_drop(skb, sch, to_free);
 644
 645	if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) {
 646		/* Large packets might not be transmitted when the transmission
 647		 * duration exceeds any configured interval. Therefore, segment
 648		 * the skb into smaller chunks. Drivers with full offload are
 649		 * expected to handle this in hardware.
 650		 */
 651		if (skb_is_gso(skb))
 652			return taprio_enqueue_segmented(skb, sch, child,
 653							to_free);
 654
 655		return qdisc_drop(skb, sch, to_free);
 656	}
 657
 658	return taprio_enqueue_one(skb, sch, child, to_free);
 659}
 660
 661static struct sk_buff *taprio_peek(struct Qdisc *sch)
 662{
 663	WARN_ONCE(1, "taprio only supports operating as root qdisc, peek() not implemented");
 664	return NULL;
 665}
 666
 667static void taprio_set_budgets(struct taprio_sched *q,
 668			       struct sched_gate_list *sched,
 669			       struct sched_entry *entry)
 670{
 671	struct net_device *dev = qdisc_dev(q->root);
 672	int num_tc = netdev_get_num_tc(dev);
 673	int tc, budget;
 674
 675	for (tc = 0; tc < num_tc; tc++) {
 676		/* Traffic classes which never close have infinite budget */
 677		if (entry->gate_duration[tc] == sched->cycle_time)
 678			budget = INT_MAX;
 679		else
 680			budget = div64_u64((u64)entry->gate_duration[tc] * PSEC_PER_NSEC,
 681					   atomic64_read(&q->picos_per_byte));
 682
 683		atomic_set(&entry->budget[tc], budget);
 684	}
 685}
 686
 687/* When an skb is sent, it consumes from the budget of all traffic classes */
 688static int taprio_update_budgets(struct sched_entry *entry, size_t len,
 689				 int tc_consumed, int num_tc)
 690{
 691	int tc, budget, new_budget = 0;
 692
 693	for (tc = 0; tc < num_tc; tc++) {
 694		budget = atomic_read(&entry->budget[tc]);
 695		/* Don't consume from infinite budget */
 696		if (budget == INT_MAX) {
 697			if (tc == tc_consumed)
 698				new_budget = budget;
 699			continue;
 700		}
 701
 702		if (tc == tc_consumed)
 703			new_budget = atomic_sub_return(len, &entry->budget[tc]);
 704		else
 705			atomic_sub(len, &entry->budget[tc]);
 706	}
 707
 708	return new_budget;
 709}
 710
 711static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
 712					       struct sched_entry *entry,
 713					       u32 gate_mask)
 714{
 715	struct taprio_sched *q = qdisc_priv(sch);
 716	struct net_device *dev = qdisc_dev(sch);
 717	struct Qdisc *child = q->qdiscs[txq];
 718	int num_tc = netdev_get_num_tc(dev);
 719	struct sk_buff *skb;
 720	ktime_t guard;
 721	int prio;
 722	int len;
 723	u8 tc;
 724
 725	if (unlikely(!child))
 726		return NULL;
 727
 728	if (TXTIME_ASSIST_IS_ENABLED(q->flags))
 729		goto skip_peek_checks;
 730
 731	skb = child->ops->peek(child);
 732	if (!skb)
 733		return NULL;
 734
 735	prio = skb->priority;
 736	tc = netdev_get_prio_tc_map(dev, prio);
 737
 738	if (!(gate_mask & BIT(tc)))
 739		return NULL;
 740
 741	len = qdisc_pkt_len(skb);
 742	guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len));
 743
 744	/* In the case that there's no gate entry, there's no
 745	 * guard band ...
 746	 */
 747	if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
 748	    !taprio_entry_allows_tx(guard, entry, tc))
 749		return NULL;
 750
 751	/* ... and no budget. */
 752	if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
 753	    taprio_update_budgets(entry, len, tc, num_tc) < 0)
 754		return NULL;
 755
 756skip_peek_checks:
 757	skb = child->ops->dequeue(child);
 758	if (unlikely(!skb))
 759		return NULL;
 760
 761	qdisc_bstats_update(sch, skb);
 762	qdisc_qstats_backlog_dec(sch, skb);
 763	sch->q.qlen--;
 764
 765	return skb;
 766}
 767
 768static void taprio_next_tc_txq(struct net_device *dev, int tc, int *txq)
 769{
 770	int offset = dev->tc_to_txq[tc].offset;
 771	int count = dev->tc_to_txq[tc].count;
 772
 773	(*txq)++;
 774	if (*txq == offset + count)
 775		*txq = offset;
 776}
 777
 778/* Prioritize higher traffic classes, and select among TXQs belonging to the
 779 * same TC using round robin
 780 */
 781static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
 782						  struct sched_entry *entry,
 783						  u32 gate_mask)
 784{
 785	struct taprio_sched *q = qdisc_priv(sch);
 786	struct net_device *dev = qdisc_dev(sch);
 787	int num_tc = netdev_get_num_tc(dev);
 788	struct sk_buff *skb;
 789	int tc;
 790
 791	for (tc = num_tc - 1; tc >= 0; tc--) {
 792		int first_txq = q->cur_txq[tc];
 793
 794		if (!(gate_mask & BIT(tc)))
 795			continue;
 796
 797		do {
 798			skb = taprio_dequeue_from_txq(sch, q->cur_txq[tc],
 799						      entry, gate_mask);
 800
 801			taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
 802
 803			if (q->cur_txq[tc] >= dev->num_tx_queues)
 804				q->cur_txq[tc] = first_txq;
 805
 806			if (skb)
 807				return skb;
 808		} while (q->cur_txq[tc] != first_txq);
 809	}
 810
 811	return NULL;
 812}
 813
 814/* Broken way of prioritizing smaller TXQ indices and ignoring the traffic
 815 * class other than to determine whether the gate is open or not
 816 */
 817static struct sk_buff *taprio_dequeue_txq_priority(struct Qdisc *sch,
 818						   struct sched_entry *entry,
 819						   u32 gate_mask)
 820{
 821	struct net_device *dev = qdisc_dev(sch);
 822	struct sk_buff *skb;
 823	int i;
 824
 825	for (i = 0; i < dev->num_tx_queues; i++) {
 826		skb = taprio_dequeue_from_txq(sch, i, entry, gate_mask);
 827		if (skb)
 828			return skb;
 829	}
 830
 831	return NULL;
 832}
 833
 834/* Will not be called in the full offload case, since the TX queues are
 835 * attached to the Qdisc created using qdisc_create_dflt()
 836 */
 837static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
 838{
 839	struct taprio_sched *q = qdisc_priv(sch);
 840	struct sk_buff *skb = NULL;
 841	struct sched_entry *entry;
 842	u32 gate_mask;
 843
 844	rcu_read_lock();
 845	entry = rcu_dereference(q->current_entry);
 846	/* if there's no entry, it means that the schedule didn't
 847	 * start yet, so force all gates to be open, this is in
 848	 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
 849	 * "AdminGateStates"
 850	 */
 851	gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
 852	if (!gate_mask)
 853		goto done;
 854
 855	if (static_branch_unlikely(&taprio_have_broken_mqprio) &&
 856	    !static_branch_likely(&taprio_have_working_mqprio)) {
 857		/* Single NIC kind which is broken */
 858		skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
 859	} else if (static_branch_likely(&taprio_have_working_mqprio) &&
 860		   !static_branch_unlikely(&taprio_have_broken_mqprio)) {
 861		/* Single NIC kind which prioritizes properly */
 862		skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
 863	} else {
 864		/* Mixed NIC kinds present in system, need dynamic testing */
 865		if (q->broken_mqprio)
 866			skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
 867		else
 868			skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
 869	}
 870
 871done:
 872	rcu_read_unlock();
 873
 874	return skb;
 875}
 876
 877static bool should_restart_cycle(const struct sched_gate_list *oper,
 878				 const struct sched_entry *entry)
 879{
 880	if (list_is_last(&entry->list, &oper->entries))
 881		return true;
 882
 883	if (ktime_compare(entry->end_time, oper->cycle_end_time) == 0)
 884		return true;
 885
 886	return false;
 887}
 888
 889static bool should_change_schedules(const struct sched_gate_list *admin,
 890				    const struct sched_gate_list *oper,
 891				    ktime_t end_time)
 892{
 893	ktime_t next_base_time, extension_time;
 894
 895	if (!admin)
 896		return false;
 897
 898	next_base_time = sched_base_time(admin);
 899
 900	/* This is the simple case, the end_time would fall after
 901	 * the next schedule base_time.
 902	 */
 903	if (ktime_compare(next_base_time, end_time) <= 0)
 904		return true;
 905
 906	/* This is the cycle_time_extension case, if the end_time
 907	 * plus the amount that can be extended would fall after the
 908	 * next schedule base_time, we can extend the current schedule
 909	 * for that amount.
 910	 */
 911	extension_time = ktime_add_ns(end_time, oper->cycle_time_extension);
 912
 913	/* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
 914	 * how precisely the extension should be made. So after
 915	 * conformance testing, this logic may change.
 916	 */
 917	if (ktime_compare(next_base_time, extension_time) <= 0)
 918		return true;
 919
 920	return false;
 921}
 922
 923static enum hrtimer_restart advance_sched(struct hrtimer *timer)
 924{
 925	struct taprio_sched *q = container_of(timer, struct taprio_sched,
 926					      advance_timer);
 927	struct net_device *dev = qdisc_dev(q->root);
 928	struct sched_gate_list *oper, *admin;
 929	int num_tc = netdev_get_num_tc(dev);
 930	struct sched_entry *entry, *next;
 931	struct Qdisc *sch = q->root;
 932	ktime_t end_time;
 933	int tc;
 934
 935	spin_lock(&q->current_entry_lock);
 936	entry = rcu_dereference_protected(q->current_entry,
 937					  lockdep_is_held(&q->current_entry_lock));
 938	oper = rcu_dereference_protected(q->oper_sched,
 939					 lockdep_is_held(&q->current_entry_lock));
 940	admin = rcu_dereference_protected(q->admin_sched,
 941					  lockdep_is_held(&q->current_entry_lock));
 942
 943	if (!oper)
 944		switch_schedules(q, &admin, &oper);
 945
 946	/* This can happen in two cases: 1. this is the very first run
 947	 * of this function (i.e. we weren't running any schedule
 948	 * previously); 2. The previous schedule just ended. The first
 949	 * entry of all schedules are pre-calculated during the
 950	 * schedule initialization.
 951	 */
 952	if (unlikely(!entry || entry->end_time == oper->base_time)) {
 953		next = list_first_entry(&oper->entries, struct sched_entry,
 954					list);
 955		end_time = next->end_time;
 956		goto first_run;
 957	}
 958
 959	if (should_restart_cycle(oper, entry)) {
 960		next = list_first_entry(&oper->entries, struct sched_entry,
 961					list);
 962		oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time,
 963						    oper->cycle_time);
 964	} else {
 965		next = list_next_entry(entry, list);
 966	}
 967
 968	end_time = ktime_add_ns(entry->end_time, next->interval);
 969	end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
 970
 971	for (tc = 0; tc < num_tc; tc++) {
 972		if (next->gate_duration[tc] == oper->cycle_time)
 973			next->gate_close_time[tc] = KTIME_MAX;
 974		else
 975			next->gate_close_time[tc] = ktime_add_ns(entry->end_time,
 976								 next->gate_duration[tc]);
 977	}
 978
 979	if (should_change_schedules(admin, oper, end_time)) {
 980		/* Set things so the next time this runs, the new
 981		 * schedule runs.
 982		 */
 983		end_time = sched_base_time(admin);
 984		switch_schedules(q, &admin, &oper);
 985	}
 986
 987	next->end_time = end_time;
 988	taprio_set_budgets(q, oper, next);
 989
 990first_run:
 991	rcu_assign_pointer(q->current_entry, next);
 992	spin_unlock(&q->current_entry_lock);
 993
 994	hrtimer_set_expires(&q->advance_timer, end_time);
 995
 996	rcu_read_lock();
 997	__netif_schedule(sch);
 998	rcu_read_unlock();
 999
1000	return HRTIMER_RESTART;
1001}
1002
1003static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
1004	[TCA_TAPRIO_SCHED_ENTRY_INDEX]	   = { .type = NLA_U32 },
1005	[TCA_TAPRIO_SCHED_ENTRY_CMD]	   = { .type = NLA_U8 },
1006	[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
1007	[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
1008};
1009
1010static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
1011	[TCA_TAPRIO_TC_ENTRY_INDEX]	   = { .type = NLA_U32 },
1012	[TCA_TAPRIO_TC_ENTRY_MAX_SDU]	   = { .type = NLA_U32 },
1013	[TCA_TAPRIO_TC_ENTRY_FP]	   = NLA_POLICY_RANGE(NLA_U32,
1014							      TC_FP_EXPRESS,
1015							      TC_FP_PREEMPTIBLE),
1016};
1017
1018static const struct netlink_range_validation_signed taprio_cycle_time_range = {
1019	.min = 0,
1020	.max = INT_MAX,
1021};
1022
1023static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
1024	[TCA_TAPRIO_ATTR_PRIOMAP]	       = {
1025		.len = sizeof(struct tc_mqprio_qopt)
1026	},
1027	[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]           = { .type = NLA_NESTED },
1028	[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]            = { .type = NLA_S64 },
1029	[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]         = { .type = NLA_NESTED },
1030	[TCA_TAPRIO_ATTR_SCHED_CLOCKID]              = { .type = NLA_S32 },
1031	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           =
1032		NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
1033	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
1034	[TCA_TAPRIO_ATTR_FLAGS]                      = { .type = NLA_U32 },
1035	[TCA_TAPRIO_ATTR_TXTIME_DELAY]		     = { .type = NLA_U32 },
1036	[TCA_TAPRIO_ATTR_TC_ENTRY]		     = { .type = NLA_NESTED },
1037};
1038
1039static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
1040			    struct sched_entry *entry,
1041			    struct netlink_ext_ack *extack)
1042{
1043	int min_duration = length_to_duration(q, ETH_ZLEN);
1044	u32 interval = 0;
1045
1046	if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
1047		entry->command = nla_get_u8(
1048			tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
1049
1050	if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
1051		entry->gate_mask = nla_get_u32(
1052			tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
1053
1054	if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
1055		interval = nla_get_u32(
1056			tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
1057
1058	/* The interval should allow at least the minimum ethernet
1059	 * frame to go out.
1060	 */
1061	if (interval < min_duration) {
1062		NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
1063		return -EINVAL;
1064	}
1065
1066	entry->interval = interval;
1067
1068	return 0;
1069}
1070
1071static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
1072			     struct sched_entry *entry, int index,
1073			     struct netlink_ext_ack *extack)
1074{
1075	struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
1076	int err;
1077
1078	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
1079					  entry_policy, NULL);
1080	if (err < 0) {
1081		NL_SET_ERR_MSG(extack, "Could not parse nested entry");
1082		return -EINVAL;
1083	}
1084
1085	entry->index = index;
1086
1087	return fill_sched_entry(q, tb, entry, extack);
1088}
1089
1090static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
1091			    struct sched_gate_list *sched,
1092			    struct netlink_ext_ack *extack)
1093{
1094	struct nlattr *n;
1095	int err, rem;
1096	int i = 0;
1097
1098	if (!list)
1099		return -EINVAL;
1100
1101	nla_for_each_nested(n, list, rem) {
1102		struct sched_entry *entry;
1103
1104		if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
1105			NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
1106			continue;
1107		}
1108
1109		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1110		if (!entry) {
1111			NL_SET_ERR_MSG(extack, "Not enough memory for entry");
1112			return -ENOMEM;
1113		}
1114
1115		err = parse_sched_entry(q, n, entry, i, extack);
1116		if (err < 0) {
1117			kfree(entry);
1118			return err;
1119		}
1120
1121		list_add_tail(&entry->list, &sched->entries);
1122		i++;
1123	}
1124
1125	sched->num_entries = i;
1126
1127	return i;
1128}
1129
1130static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
1131				 struct sched_gate_list *new,
1132				 struct netlink_ext_ack *extack)
1133{
1134	int err = 0;
1135
1136	if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
1137		NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
1138		return -ENOTSUPP;
1139	}
1140
1141	if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
1142		new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
1143
1144	if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
1145		new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
1146
1147	if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
1148		new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
1149
1150	if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
1151		err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
1152				       new, extack);
1153	if (err < 0)
1154		return err;
1155
1156	if (!new->cycle_time) {
1157		struct sched_entry *entry;
1158		ktime_t cycle = 0;
1159
1160		list_for_each_entry(entry, &new->entries, list)
1161			cycle = ktime_add_ns(cycle, entry->interval);
1162
1163		if (!cycle) {
1164			NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
1165			return -EINVAL;
1166		}
1167
1168		if (cycle < 0 || cycle > INT_MAX) {
1169			NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
1170			return -EINVAL;
1171		}
1172
1173		new->cycle_time = cycle;
1174	}
1175
1176	taprio_calculate_gate_durations(q, new);
1177
1178	return 0;
1179}
1180
1181static int taprio_parse_mqprio_opt(struct net_device *dev,
1182				   struct tc_mqprio_qopt *qopt,
1183				   struct netlink_ext_ack *extack,
1184				   u32 taprio_flags)
1185{
1186	bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
1187
1188	if (!qopt && !dev->num_tc) {
1189		NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
1190		return -EINVAL;
1191	}
1192
1193	/* If num_tc is already set, it means that the user already
1194	 * configured the mqprio part
1195	 */
1196	if (dev->num_tc)
1197		return 0;
1198
1199	/* taprio imposes that traffic classes map 1:n to tx queues */
1200	if (qopt->num_tc > dev->num_tx_queues) {
1201		NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
1202		return -EINVAL;
1203	}
1204
1205	/* For some reason, in txtime-assist mode, we allow TXQ ranges for
1206	 * different TCs to overlap, and just validate the TXQ ranges.
1207	 */
1208	return mqprio_validate_qopt(dev, qopt, true, allow_overlapping_txqs,
1209				    extack);
1210}
1211
1212static int taprio_get_start_time(struct Qdisc *sch,
1213				 struct sched_gate_list *sched,
1214				 ktime_t *start)
1215{
1216	struct taprio_sched *q = qdisc_priv(sch);
1217	ktime_t now, base, cycle;
1218	s64 n;
1219
1220	base = sched_base_time(sched);
1221	now = taprio_get_time(q);
1222
1223	if (ktime_after(base, now)) {
1224		*start = base;
1225		return 0;
1226	}
1227
1228	cycle = sched->cycle_time;
1229
1230	/* The qdisc is expected to have at least one sched_entry.  Moreover,
1231	 * any entry must have 'interval' > 0. Thus if the cycle time is zero,
1232	 * something went really wrong. In that case, we should warn about this
1233	 * inconsistent state and return error.
1234	 */
1235	if (WARN_ON(!cycle))
1236		return -EFAULT;
1237
1238	/* Schedule the start time for the beginning of the next
1239	 * cycle.
1240	 */
1241	n = div64_s64(ktime_sub_ns(now, base), cycle);
1242	*start = ktime_add_ns(base, (n + 1) * cycle);
1243	return 0;
1244}
1245
1246static void setup_first_end_time(struct taprio_sched *q,
1247				 struct sched_gate_list *sched, ktime_t base)
1248{
1249	struct net_device *dev = qdisc_dev(q->root);
1250	int num_tc = netdev_get_num_tc(dev);
1251	struct sched_entry *first;
1252	ktime_t cycle;
1253	int tc;
1254
1255	first = list_first_entry(&sched->entries,
1256				 struct sched_entry, list);
1257
1258	cycle = sched->cycle_time;
1259
1260	/* FIXME: find a better place to do this */
1261	sched->cycle_end_time = ktime_add_ns(base, cycle);
1262
1263	first->end_time = ktime_add_ns(base, first->interval);
1264	taprio_set_budgets(q, sched, first);
1265
1266	for (tc = 0; tc < num_tc; tc++) {
1267		if (first->gate_duration[tc] == sched->cycle_time)
1268			first->gate_close_time[tc] = KTIME_MAX;
1269		else
1270			first->gate_close_time[tc] = ktime_add_ns(base, first->gate_duration[tc]);
1271	}
1272
1273	rcu_assign_pointer(q->current_entry, NULL);
1274}
1275
1276static void taprio_start_sched(struct Qdisc *sch,
1277			       ktime_t start, struct sched_gate_list *new)
1278{
1279	struct taprio_sched *q = qdisc_priv(sch);
1280	ktime_t expires;
1281
1282	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
1283		return;
1284
1285	expires = hrtimer_get_expires(&q->advance_timer);
1286	if (expires == 0)
1287		expires = KTIME_MAX;
1288
1289	/* If the new schedule starts before the next expiration, we
1290	 * reprogram it to the earliest one, so we change the admin
1291	 * schedule to the operational one at the right time.
1292	 */
1293	start = min_t(ktime_t, start, expires);
1294
1295	hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
1296}
1297
1298static void taprio_set_picos_per_byte(struct net_device *dev,
1299				      struct taprio_sched *q)
1300{
1301	struct ethtool_link_ksettings ecmd;
1302	int speed = SPEED_10;
1303	int picos_per_byte;
1304	int err;
1305
1306	err = __ethtool_get_link_ksettings(dev, &ecmd);
1307	if (err < 0)
1308		goto skip;
1309
1310	if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
1311		speed = ecmd.base.speed;
1312
1313skip:
1314	picos_per_byte = (USEC_PER_SEC * 8) / speed;
1315
1316	atomic64_set(&q->picos_per_byte, picos_per_byte);
1317	netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
1318		   dev->name, (long long)atomic64_read(&q->picos_per_byte),
1319		   ecmd.base.speed);
1320}
1321
1322static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
1323			       void *ptr)
1324{
1325	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1326	struct sched_gate_list *oper, *admin;
1327	struct qdisc_size_table *stab;
1328	struct taprio_sched *q;
1329
1330	ASSERT_RTNL();
1331
1332	if (event != NETDEV_UP && event != NETDEV_CHANGE)
1333		return NOTIFY_DONE;
1334
1335	list_for_each_entry(q, &taprio_list, taprio_list) {
1336		if (dev != qdisc_dev(q->root))
1337			continue;
1338
1339		taprio_set_picos_per_byte(dev, q);
1340
1341		stab = rtnl_dereference(q->root->stab);
1342
1343		oper = rtnl_dereference(q->oper_sched);
1344		if (oper)
1345			taprio_update_queue_max_sdu(q, oper, stab);
1346
1347		admin = rtnl_dereference(q->admin_sched);
1348		if (admin)
1349			taprio_update_queue_max_sdu(q, admin, stab);
1350
1351		break;
1352	}
1353
1354	return NOTIFY_DONE;
1355}
1356
1357static void setup_txtime(struct taprio_sched *q,
1358			 struct sched_gate_list *sched, ktime_t base)
1359{
1360	struct sched_entry *entry;
1361	u64 interval = 0;
1362
1363	list_for_each_entry(entry, &sched->entries, list) {
1364		entry->next_txtime = ktime_add_ns(base, interval);
1365		interval += entry->interval;
1366	}
1367}
1368
1369static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
1370{
1371	struct __tc_taprio_qopt_offload *__offload;
1372
1373	__offload = kzalloc(struct_size(__offload, offload.entries, num_entries),
1374			    GFP_KERNEL);
1375	if (!__offload)
1376		return NULL;
1377
1378	refcount_set(&__offload->users, 1);
1379
1380	return &__offload->offload;
1381}
1382
1383struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
1384						  *offload)
1385{
1386	struct __tc_taprio_qopt_offload *__offload;
1387
1388	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
1389				 offload);
1390
1391	refcount_inc(&__offload->users);
1392
1393	return offload;
1394}
1395EXPORT_SYMBOL_GPL(taprio_offload_get);
1396
1397void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
1398{
1399	struct __tc_taprio_qopt_offload *__offload;
1400
1401	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
1402				 offload);
1403
1404	if (!refcount_dec_and_test(&__offload->users))
1405		return;
1406
1407	kfree(__offload);
1408}
1409EXPORT_SYMBOL_GPL(taprio_offload_free);
1410
1411/* The function will only serve to keep the pointers to the "oper" and "admin"
1412 * schedules valid in relation to their base times, so when calling dump() the
1413 * users looks at the right schedules.
1414 * When using full offload, the admin configuration is promoted to oper at the
1415 * base_time in the PHC time domain.  But because the system time is not
1416 * necessarily in sync with that, we can't just trigger a hrtimer to call
1417 * switch_schedules at the right hardware time.
1418 * At the moment we call this by hand right away from taprio, but in the future
1419 * it will be useful to create a mechanism for drivers to notify taprio of the
1420 * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
1421 * This is left as TODO.
1422 */
1423static void taprio_offload_config_changed(struct taprio_sched *q)
1424{
1425	struct sched_gate_list *oper, *admin;
1426
1427	oper = rtnl_dereference(q->oper_sched);
1428	admin = rtnl_dereference(q->admin_sched);
1429
1430	switch_schedules(q, &admin, &oper);
1431}
1432
1433static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
1434{
1435	u32 i, queue_mask = 0;
1436
1437	for (i = 0; i < dev->num_tc; i++) {
1438		u32 offset, count;
1439
1440		if (!(tc_mask & BIT(i)))
1441			continue;
1442
1443		offset = dev->tc_to_txq[i].offset;
1444		count = dev->tc_to_txq[i].count;
1445
1446		queue_mask |= GENMASK(offset + count - 1, offset);
1447	}
1448
1449	return queue_mask;
1450}
1451
1452static void taprio_sched_to_offload(struct net_device *dev,
1453				    struct sched_gate_list *sched,
1454				    struct tc_taprio_qopt_offload *offload,
1455				    const struct tc_taprio_caps *caps)
1456{
1457	struct sched_entry *entry;
1458	int i = 0;
1459
1460	offload->base_time = sched->base_time;
1461	offload->cycle_time = sched->cycle_time;
1462	offload->cycle_time_extension = sched->cycle_time_extension;
1463
1464	list_for_each_entry(entry, &sched->entries, list) {
1465		struct tc_taprio_sched_entry *e = &offload->entries[i];
1466
1467		e->command = entry->command;
1468		e->interval = entry->interval;
1469		if (caps->gate_mask_per_txq)
1470			e->gate_mask = tc_map_to_queue_mask(dev,
1471							    entry->gate_mask);
1472		else
1473			e->gate_mask = entry->gate_mask;
1474
1475		i++;
1476	}
1477
1478	offload->num_entries = i;
1479}
1480
1481static void taprio_detect_broken_mqprio(struct taprio_sched *q)
1482{
1483	struct net_device *dev = qdisc_dev(q->root);
1484	struct tc_taprio_caps caps;
1485
1486	qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
1487				 &caps, sizeof(caps));
1488
1489	q->broken_mqprio = caps.broken_mqprio;
1490	if (q->broken_mqprio)
1491		static_branch_inc(&taprio_have_broken_mqprio);
1492	else
1493		static_branch_inc(&taprio_have_working_mqprio);
1494
1495	q->detected_mqprio = true;
1496}
1497
1498static void taprio_cleanup_broken_mqprio(struct taprio_sched *q)
1499{
1500	if (!q->detected_mqprio)
1501		return;
1502
1503	if (q->broken_mqprio)
1504		static_branch_dec(&taprio_have_broken_mqprio);
1505	else
1506		static_branch_dec(&taprio_have_working_mqprio);
1507}
1508
1509static int taprio_enable_offload(struct net_device *dev,
1510				 struct taprio_sched *q,
1511				 struct sched_gate_list *sched,
1512				 struct netlink_ext_ack *extack)
1513{
1514	const struct net_device_ops *ops = dev->netdev_ops;
1515	struct tc_taprio_qopt_offload *offload;
1516	struct tc_taprio_caps caps;
1517	int tc, err = 0;
1518
1519	if (!ops->ndo_setup_tc) {
1520		NL_SET_ERR_MSG(extack,
1521			       "Device does not support taprio offload");
1522		return -EOPNOTSUPP;
1523	}
1524
1525	qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
1526				 &caps, sizeof(caps));
1527
1528	if (!caps.supports_queue_max_sdu) {
1529		for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
1530			if (q->max_sdu[tc]) {
1531				NL_SET_ERR_MSG_MOD(extack,
1532						   "Device does not handle queueMaxSDU");
1533				return -EOPNOTSUPP;
1534			}
1535		}
1536	}
1537
1538	offload = taprio_offload_alloc(sched->num_entries);
1539	if (!offload) {
1540		NL_SET_ERR_MSG(extack,
1541			       "Not enough memory for enabling offload mode");
1542		return -ENOMEM;
1543	}
1544	offload->cmd = TAPRIO_CMD_REPLACE;
1545	offload->extack = extack;
1546	mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
1547	offload->mqprio.extack = extack;
1548	taprio_sched_to_offload(dev, sched, offload, &caps);
1549	mqprio_fp_to_offload(q->fp, &offload->mqprio);
1550
1551	for (tc = 0; tc < TC_MAX_QUEUE; tc++)
1552		offload->max_sdu[tc] = q->max_sdu[tc];
1553
1554	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
1555	if (err < 0) {
1556		NL_SET_ERR_MSG_WEAK(extack,
1557				    "Device failed to setup taprio offload");
1558		goto done;
1559	}
1560
1561	q->offloaded = true;
1562
1563done:
1564	/* The offload structure may linger around via a reference taken by the
1565	 * device driver, so clear up the netlink extack pointer so that the
1566	 * driver isn't tempted to dereference data which stopped being valid
1567	 */
1568	offload->extack = NULL;
1569	offload->mqprio.extack = NULL;
1570	taprio_offload_free(offload);
1571
1572	return err;
1573}
1574
1575static int taprio_disable_offload(struct net_device *dev,
1576				  struct taprio_sched *q,
1577				  struct netlink_ext_ack *extack)
1578{
1579	const struct net_device_ops *ops = dev->netdev_ops;
1580	struct tc_taprio_qopt_offload *offload;
1581	int err;
1582
1583	if (!q->offloaded)
1584		return 0;
1585
1586	offload = taprio_offload_alloc(0);
1587	if (!offload) {
1588		NL_SET_ERR_MSG(extack,
1589			       "Not enough memory to disable offload mode");
1590		return -ENOMEM;
1591	}
1592	offload->cmd = TAPRIO_CMD_DESTROY;
1593
1594	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
1595	if (err < 0) {
1596		NL_SET_ERR_MSG(extack,
1597			       "Device failed to disable offload");
1598		goto out;
1599	}
1600
1601	q->offloaded = false;
1602
1603out:
1604	taprio_offload_free(offload);
1605
1606	return err;
1607}
1608
1609/* If full offload is enabled, the only possible clockid is the net device's
1610 * PHC. For that reason, specifying a clockid through netlink is incorrect.
1611 * For txtime-assist, it is implicitly assumed that the device's PHC is kept
1612 * in sync with the specified clockid via a user space daemon such as phc2sys.
1613 * For both software taprio and txtime-assist, the clockid is used for the
1614 * hrtimer that advances the schedule and hence mandatory.
1615 */
1616static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
1617				struct netlink_ext_ack *extack)
1618{
1619	struct taprio_sched *q = qdisc_priv(sch);
1620	struct net_device *dev = qdisc_dev(sch);
1621	int err = -EINVAL;
1622
1623	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
1624		const struct ethtool_ops *ops = dev->ethtool_ops;
1625		struct ethtool_ts_info info = {
1626			.cmd = ETHTOOL_GET_TS_INFO,
1627			.phc_index = -1,
1628		};
1629
1630		if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
1631			NL_SET_ERR_MSG(extack,
1632				       "The 'clockid' cannot be specified for full offload");
1633			goto out;
1634		}
1635
1636		if (ops && ops->get_ts_info)
1637			err = ops->get_ts_info(dev, &info);
1638
1639		if (err || info.phc_index < 0) {
1640			NL_SET_ERR_MSG(extack,
1641				       "Device does not have a PTP clock");
1642			err = -ENOTSUPP;
1643			goto out;
1644		}
1645	} else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
1646		int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
1647		enum tk_offsets tk_offset;
1648
1649		/* We only support static clockids and we don't allow
1650		 * for it to be modified after the first init.
1651		 */
1652		if (clockid < 0 ||
1653		    (q->clockid != -1 && q->clockid != clockid)) {
1654			NL_SET_ERR_MSG(extack,
1655				       "Changing the 'clockid' of a running schedule is not supported");
1656			err = -ENOTSUPP;
1657			goto out;
1658		}
1659
1660		switch (clockid) {
1661		case CLOCK_REALTIME:
1662			tk_offset = TK_OFFS_REAL;
1663			break;
1664		case CLOCK_MONOTONIC:
1665			tk_offset = TK_OFFS_MAX;
1666			break;
1667		case CLOCK_BOOTTIME:
1668			tk_offset = TK_OFFS_BOOT;
1669			break;
1670		case CLOCK_TAI:
1671			tk_offset = TK_OFFS_TAI;
1672			break;
1673		default:
1674			NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
1675			err = -EINVAL;
1676			goto out;
1677		}
1678		/* This pairs with READ_ONCE() in taprio_mono_to_any */
1679		WRITE_ONCE(q->tk_offset, tk_offset);
1680
1681		q->clockid = clockid;
1682	} else {
1683		NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
1684		goto out;
1685	}
1686
1687	/* Everything went ok, return success. */
1688	err = 0;
1689
1690out:
1691	return err;
1692}
1693
1694static int taprio_parse_tc_entry(struct Qdisc *sch,
1695				 struct nlattr *opt,
1696				 u32 max_sdu[TC_QOPT_MAX_QUEUE],
1697				 u32 fp[TC_QOPT_MAX_QUEUE],
1698				 unsigned long *seen_tcs,
1699				 struct netlink_ext_ack *extack)
1700{
1701	struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
1702	struct net_device *dev = qdisc_dev(sch);
1703	int err, tc;
1704	u32 val;
1705
1706	err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
1707			       taprio_tc_policy, extack);
1708	if (err < 0)
1709		return err;
1710
1711	if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) {
1712		NL_SET_ERR_MSG_MOD(extack, "TC entry index missing");
1713		return -EINVAL;
1714	}
1715
1716	tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]);
1717	if (tc >= TC_QOPT_MAX_QUEUE) {
1718		NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range");
1719		return -ERANGE;
1720	}
1721
1722	if (*seen_tcs & BIT(tc)) {
1723		NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry");
1724		return -EINVAL;
1725	}
1726
1727	*seen_tcs |= BIT(tc);
1728
1729	if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
1730		val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
1731		if (val > dev->max_mtu) {
1732			NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
1733			return -ERANGE;
1734		}
1735
1736		max_sdu[tc] = val;
1737	}
1738
1739	if (tb[TCA_TAPRIO_TC_ENTRY_FP])
1740		fp[tc] = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]);
1741
1742	return 0;
1743}
1744
1745static int taprio_parse_tc_entries(struct Qdisc *sch,
1746				   struct nlattr *opt,
1747				   struct netlink_ext_ack *extack)
1748{
1749	struct taprio_sched *q = qdisc_priv(sch);
1750	struct net_device *dev = qdisc_dev(sch);
1751	u32 max_sdu[TC_QOPT_MAX_QUEUE];
1752	bool have_preemption = false;
1753	unsigned long seen_tcs = 0;
1754	u32 fp[TC_QOPT_MAX_QUEUE];
1755	struct nlattr *n;
1756	int tc, rem;
1757	int err = 0;
1758
1759	for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
1760		max_sdu[tc] = q->max_sdu[tc];
1761		fp[tc] = q->fp[tc];
1762	}
1763
1764	nla_for_each_nested(n, opt, rem) {
1765		if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
1766			continue;
1767
1768		err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
1769					    extack);
1770		if (err)
1771			return err;
1772	}
1773
1774	for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
1775		q->max_sdu[tc] = max_sdu[tc];
1776		q->fp[tc] = fp[tc];
1777		if (fp[tc] != TC_FP_EXPRESS)
1778			have_preemption = true;
1779	}
1780
1781	if (have_preemption) {
1782		if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
1783			NL_SET_ERR_MSG(extack,
1784				       "Preemption only supported with full offload");
1785			return -EOPNOTSUPP;
1786		}
1787
1788		if (!ethtool_dev_mm_supported(dev)) {
1789			NL_SET_ERR_MSG(extack,
1790				       "Device does not support preemption");
1791			return -EOPNOTSUPP;
1792		}
1793	}
1794
1795	return err;
1796}
1797
1798static int taprio_mqprio_cmp(const struct net_device *dev,
1799			     const struct tc_mqprio_qopt *mqprio)
1800{
1801	int i;
1802
1803	if (!mqprio || mqprio->num_tc != dev->num_tc)
1804		return -1;
1805
1806	for (i = 0; i < mqprio->num_tc; i++)
1807		if (dev->tc_to_txq[i].count != mqprio->count[i] ||
1808		    dev->tc_to_txq[i].offset != mqprio->offset[i])
1809			return -1;
1810
1811	for (i = 0; i <= TC_BITMASK; i++)
1812		if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
1813			return -1;
1814
1815	return 0;
1816}
1817
1818/* The semantics of the 'flags' argument in relation to 'change()'
1819 * requests, are interpreted following two rules (which are applied in
1820 * this order): (1) an omitted 'flags' argument is interpreted as
1821 * zero; (2) the 'flags' of a "running" taprio instance cannot be
1822 * changed.
1823 */
1824static int taprio_new_flags(const struct nlattr *attr, u32 old,
1825			    struct netlink_ext_ack *extack)
1826{
1827	u32 new = 0;
1828
1829	if (attr)
1830		new = nla_get_u32(attr);
1831
1832	if (old != TAPRIO_FLAGS_INVALID && old != new) {
1833		NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
1834		return -EOPNOTSUPP;
1835	}
1836
1837	if (!taprio_flags_valid(new)) {
1838		NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
1839		return -EINVAL;
1840	}
1841
1842	return new;
1843}
1844
1845static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
1846			 struct netlink_ext_ack *extack)
1847{
1848	struct qdisc_size_table *stab = rtnl_dereference(sch->stab);
1849	struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
1850	struct sched_gate_list *oper, *admin, *new_admin;
1851	struct taprio_sched *q = qdisc_priv(sch);
1852	struct net_device *dev = qdisc_dev(sch);
1853	struct tc_mqprio_qopt *mqprio = NULL;
1854	unsigned long flags;
1855	ktime_t start;
1856	int i, err;
1857
1858	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
1859					  taprio_policy, extack);
1860	if (err < 0)
1861		return err;
1862
1863	if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
1864		mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
1865
1866	err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
1867			       q->flags, extack);
1868	if (err < 0)
1869		return err;
1870
1871	q->flags = err;
1872
1873	err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
1874	if (err < 0)
1875		return err;
1876
1877	err = taprio_parse_tc_entries(sch, opt, extack);
1878	if (err)
1879		return err;
1880
1881	new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
1882	if (!new_admin) {
1883		NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
1884		return -ENOMEM;
1885	}
1886	INIT_LIST_HEAD(&new_admin->entries);
1887
1888	oper = rtnl_dereference(q->oper_sched);
1889	admin = rtnl_dereference(q->admin_sched);
1890
1891	/* no changes - no new mqprio settings */
1892	if (!taprio_mqprio_cmp(dev, mqprio))
1893		mqprio = NULL;
1894
1895	if (mqprio && (oper || admin)) {
1896		NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
1897		err = -ENOTSUPP;
1898		goto free_sched;
1899	}
1900
1901	if (mqprio) {
1902		err = netdev_set_num_tc(dev, mqprio->num_tc);
1903		if (err)
1904			goto free_sched;
1905		for (i = 0; i < mqprio->num_tc; i++) {
1906			netdev_set_tc_queue(dev, i,
1907					    mqprio->count[i],
1908					    mqprio->offset[i]);
1909			q->cur_txq[i] = mqprio->offset[i];
1910		}
1911
1912		/* Always use supplied priority mappings */
1913		for (i = 0; i <= TC_BITMASK; i++)
1914			netdev_set_prio_tc_map(dev, i,
1915					       mqprio->prio_tc_map[i]);
1916	}
1917
1918	err = parse_taprio_schedule(q, tb, new_admin, extack);
1919	if (err < 0)
1920		goto free_sched;
1921
1922	if (new_admin->num_entries == 0) {
1923		NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
1924		err = -EINVAL;
1925		goto free_sched;
1926	}
1927
1928	err = taprio_parse_clockid(sch, tb, extack);
1929	if (err < 0)
1930		goto free_sched;
1931
1932	taprio_set_picos_per_byte(dev, q);
1933	taprio_update_queue_max_sdu(q, new_admin, stab);
1934
1935	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
1936		err = taprio_enable_offload(dev, q, new_admin, extack);
1937	else
1938		err = taprio_disable_offload(dev, q, extack);
1939	if (err)
1940		goto free_sched;
1941
1942	/* Protects against enqueue()/dequeue() */
1943	spin_lock_bh(qdisc_lock(sch));
1944
1945	if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
1946		if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
1947			NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
1948			err = -EINVAL;
1949			goto unlock;
1950		}
1951
1952		q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
1953	}
1954
1955	if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
1956	    !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
1957	    !hrtimer_active(&q->advance_timer)) {
1958		hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
1959		q->advance_timer.function = advance_sched;
1960	}
1961
1962	err = taprio_get_start_time(sch, new_admin, &start);
1963	if (err < 0) {
1964		NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
1965		goto unlock;
1966	}
1967
1968	setup_txtime(q, new_admin, start);
1969
1970	if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
1971		if (!oper) {
1972			rcu_assign_pointer(q->oper_sched, new_admin);
1973			err = 0;
1974			new_admin = NULL;
1975			goto unlock;
1976		}
1977
1978		rcu_assign_pointer(q->admin_sched, new_admin);
1979		if (admin)
1980			call_rcu(&admin->rcu, taprio_free_sched_cb);
1981	} else {
1982		setup_first_end_time(q, new_admin, start);
1983
1984		/* Protects against advance_sched() */
1985		spin_lock_irqsave(&q->current_entry_lock, flags);
1986
1987		taprio_start_sched(sch, start, new_admin);
1988
1989		rcu_assign_pointer(q->admin_sched, new_admin);
1990		if (admin)
1991			call_rcu(&admin->rcu, taprio_free_sched_cb);
1992
1993		spin_unlock_irqrestore(&q->current_entry_lock, flags);
1994
1995		if (FULL_OFFLOAD_IS_ENABLED(q->flags))
1996			taprio_offload_config_changed(q);
1997	}
1998
1999	new_admin = NULL;
2000	err = 0;
2001
2002	if (!stab)
2003		NL_SET_ERR_MSG_MOD(extack,
2004				   "Size table not specified, frame length estimations may be inaccurate");
2005
2006unlock:
2007	spin_unlock_bh(qdisc_lock(sch));
2008
2009free_sched:
2010	if (new_admin)
2011		call_rcu(&new_admin->rcu, taprio_free_sched_cb);
2012
2013	return err;
2014}
2015
2016static void taprio_reset(struct Qdisc *sch)
2017{
2018	struct taprio_sched *q = qdisc_priv(sch);
2019	struct net_device *dev = qdisc_dev(sch);
2020	int i;
2021
2022	hrtimer_cancel(&q->advance_timer);
2023
2024	if (q->qdiscs) {
2025		for (i = 0; i < dev->num_tx_queues; i++)
2026			if (q->qdiscs[i])
2027				qdisc_reset(q->qdiscs[i]);
2028	}
2029}
2030
2031static void taprio_destroy(struct Qdisc *sch)
2032{
2033	struct taprio_sched *q = qdisc_priv(sch);
2034	struct net_device *dev = qdisc_dev(sch);
2035	struct sched_gate_list *oper, *admin;
2036	unsigned int i;
2037
2038	list_del(&q->taprio_list);
2039
2040	/* Note that taprio_reset() might not be called if an error
2041	 * happens in qdisc_create(), after taprio_init() has been called.
2042	 */
2043	hrtimer_cancel(&q->advance_timer);
2044	qdisc_synchronize(sch);
2045
2046	taprio_disable_offload(dev, q, NULL);
2047
2048	if (q->qdiscs) {
2049		for (i = 0; i < dev->num_tx_queues; i++)
2050			qdisc_put(q->qdiscs[i]);
2051
2052		kfree(q->qdiscs);
2053	}
2054	q->qdiscs = NULL;
2055
2056	netdev_reset_tc(dev);
2057
2058	oper = rtnl_dereference(q->oper_sched);
2059	admin = rtnl_dereference(q->admin_sched);
2060
2061	if (oper)
2062		call_rcu(&oper->rcu, taprio_free_sched_cb);
2063
2064	if (admin)
2065		call_rcu(&admin->rcu, taprio_free_sched_cb);
2066
2067	taprio_cleanup_broken_mqprio(q);
2068}
2069
2070static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
2071		       struct netlink_ext_ack *extack)
2072{
2073	struct taprio_sched *q = qdisc_priv(sch);
2074	struct net_device *dev = qdisc_dev(sch);
2075	int i, tc;
2076
2077	spin_lock_init(&q->current_entry_lock);
2078
2079	hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
2080	q->advance_timer.function = advance_sched;
2081
2082	q->root = sch;
2083
2084	/* We only support static clockids. Use an invalid value as default
2085	 * and get the valid one on taprio_change().
2086	 */
2087	q->clockid = -1;
2088	q->flags = TAPRIO_FLAGS_INVALID;
2089
2090	list_add(&q->taprio_list, &taprio_list);
2091
2092	if (sch->parent != TC_H_ROOT) {
2093		NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc");
2094		return -EOPNOTSUPP;
2095	}
2096
2097	if (!netif_is_multiqueue(dev)) {
2098		NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required");
2099		return -EOPNOTSUPP;
2100	}
2101
2102	q->qdiscs = kcalloc(dev->num_tx_queues, sizeof(q->qdiscs[0]),
2103			    GFP_KERNEL);
2104	if (!q->qdiscs)
2105		return -ENOMEM;
2106
2107	if (!opt)
2108		return -EINVAL;
2109
2110	for (i = 0; i < dev->num_tx_queues; i++) {
2111		struct netdev_queue *dev_queue;
2112		struct Qdisc *qdisc;
2113
2114		dev_queue = netdev_get_tx_queue(dev, i);
2115		qdisc = qdisc_create_dflt(dev_queue,
2116					  &pfifo_qdisc_ops,
2117					  TC_H_MAKE(TC_H_MAJ(sch->handle),
2118						    TC_H_MIN(i + 1)),
2119					  extack);
2120		if (!qdisc)
2121			return -ENOMEM;
2122
2123		if (i < dev->real_num_tx_queues)
2124			qdisc_hash_add(qdisc, false);
2125
2126		q->qdiscs[i] = qdisc;
2127	}
2128
2129	for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
2130		q->fp[tc] = TC_FP_EXPRESS;
2131
2132	taprio_detect_broken_mqprio(q);
2133
2134	return taprio_change(sch, opt, extack);
2135}
2136
2137static void taprio_attach(struct Qdisc *sch)
2138{
2139	struct taprio_sched *q = qdisc_priv(sch);
2140	struct net_device *dev = qdisc_dev(sch);
2141	unsigned int ntx;
2142
2143	/* Attach underlying qdisc */
2144	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
2145		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
2146		struct Qdisc *old, *dev_queue_qdisc;
2147
2148		if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
2149			struct Qdisc *qdisc = q->qdiscs[ntx];
2150
2151			/* In offload mode, the root taprio qdisc is bypassed
2152			 * and the netdev TX queues see the children directly
2153			 */
2154			qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
2155			dev_queue_qdisc = qdisc;
2156		} else {
2157			/* In software mode, attach the root taprio qdisc
2158			 * to all netdev TX queues, so that dev_qdisc_enqueue()
2159			 * goes through taprio_enqueue().
2160			 */
2161			dev_queue_qdisc = sch;
2162		}
2163		old = dev_graft_qdisc(dev_queue, dev_queue_qdisc);
2164		/* The qdisc's refcount requires to be elevated once
2165		 * for each netdev TX queue it is grafted onto
2166		 */
2167		qdisc_refcount_inc(dev_queue_qdisc);
2168		if (old)
2169			qdisc_put(old);
2170	}
2171}
2172
2173static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
2174					     unsigned long cl)
2175{
2176	struct net_device *dev = qdisc_dev(sch);
2177	unsigned long ntx = cl - 1;
2178
2179	if (ntx >= dev->num_tx_queues)
2180		return NULL;
2181
2182	return netdev_get_tx_queue(dev, ntx);
2183}
2184
2185static int taprio_graft(struct Qdisc *sch, unsigned long cl,
2186			struct Qdisc *new, struct Qdisc **old,
2187			struct netlink_ext_ack *extack)
2188{
2189	struct taprio_sched *q = qdisc_priv(sch);
2190	struct net_device *dev = qdisc_dev(sch);
2191	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
2192
2193	if (!dev_queue)
2194		return -EINVAL;
2195
2196	if (dev->flags & IFF_UP)
2197		dev_deactivate(dev);
2198
2199	/* In offload mode, the child Qdisc is directly attached to the netdev
2200	 * TX queue, and thus, we need to keep its refcount elevated in order
2201	 * to counteract qdisc_graft()'s call to qdisc_put() once per TX queue.
2202	 * However, save the reference to the new qdisc in the private array in
2203	 * both software and offload cases, to have an up-to-date reference to
2204	 * our children.
2205	 */
2206	*old = q->qdiscs[cl - 1];
2207	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
2208		WARN_ON_ONCE(dev_graft_qdisc(dev_queue, new) != *old);
2209		if (new)
2210			qdisc_refcount_inc(new);
2211		if (*old)
2212			qdisc_put(*old);
2213	}
2214
2215	q->qdiscs[cl - 1] = new;
2216	if (new)
2217		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
2218
2219	if (dev->flags & IFF_UP)
2220		dev_activate(dev);
2221
2222	return 0;
2223}
2224
2225static int dump_entry(struct sk_buff *msg,
2226		      const struct sched_entry *entry)
2227{
2228	struct nlattr *item;
2229
2230	item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY);
2231	if (!item)
2232		return -ENOSPC;
2233
2234	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
2235		goto nla_put_failure;
2236
2237	if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
2238		goto nla_put_failure;
2239
2240	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
2241			entry->gate_mask))
2242		goto nla_put_failure;
2243
2244	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
2245			entry->interval))
2246		goto nla_put_failure;
2247
2248	return nla_nest_end(msg, item);
2249
2250nla_put_failure:
2251	nla_nest_cancel(msg, item);
2252	return -1;
2253}
2254
2255static int dump_schedule(struct sk_buff *msg,
2256			 const struct sched_gate_list *root)
2257{
2258	struct nlattr *entry_list;
2259	struct sched_entry *entry;
2260
2261	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
2262			root->base_time, TCA_TAPRIO_PAD))
2263		return -1;
2264
2265	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
2266			root->cycle_time, TCA_TAPRIO_PAD))
2267		return -1;
2268
2269	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
2270			root->cycle_time_extension, TCA_TAPRIO_PAD))
2271		return -1;
2272
2273	entry_list = nla_nest_start_noflag(msg,
2274					   TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
2275	if (!entry_list)
2276		goto error_nest;
2277
2278	list_for_each_entry(entry, &root->entries, list) {
2279		if (dump_entry(msg, entry) < 0)
2280			goto error_nest;
2281	}
2282
2283	nla_nest_end(msg, entry_list);
2284	return 0;
2285
2286error_nest:
2287	nla_nest_cancel(msg, entry_list);
2288	return -1;
2289}
2290
2291static int taprio_dump_tc_entries(struct sk_buff *skb,
2292				  struct taprio_sched *q,
2293				  struct sched_gate_list *sched)
2294{
2295	struct nlattr *n;
2296	int tc;
2297
2298	for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
2299		n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY);
2300		if (!n)
2301			return -EMSGSIZE;
2302
2303		if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc))
2304			goto nla_put_failure;
2305
2306		if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
2307				sched->max_sdu[tc]))
2308			goto nla_put_failure;
2309
2310		if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_FP, q->fp[tc]))
2311			goto nla_put_failure;
2312
2313		nla_nest_end(skb, n);
2314	}
2315
2316	return 0;
2317
2318nla_put_failure:
2319	nla_nest_cancel(skb, n);
2320	return -EMSGSIZE;
2321}
2322
2323static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
2324{
2325	if (val == TAPRIO_STAT_NOT_SET)
2326		return 0;
2327	if (nla_put_u64_64bit(skb, attrtype, val, TCA_TAPRIO_OFFLOAD_STATS_PAD))
2328		return -EMSGSIZE;
2329	return 0;
2330}
2331
2332static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d,
2333			      struct tc_taprio_qopt_offload *offload,
2334			      struct tc_taprio_qopt_stats *stats)
2335{
2336	struct net_device *dev = qdisc_dev(sch);
2337	const struct net_device_ops *ops;
2338	struct sk_buff *skb = d->skb;
2339	struct nlattr *xstats;
2340	int err;
2341
2342	ops = qdisc_dev(sch)->netdev_ops;
2343
2344	/* FIXME I could use qdisc_offload_dump_helper(), but that messes
2345	 * with sch->flags depending on whether the device reports taprio
2346	 * stats, and I'm not sure whether that's a good idea, considering
2347	 * that stats are optional to the offload itself
2348	 */
2349	if (!ops->ndo_setup_tc)
2350		return 0;
2351
2352	memset(stats, 0xff, sizeof(*stats));
2353
2354	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
2355	if (err == -EOPNOTSUPP)
2356		return 0;
2357	if (err)
2358		return err;
2359
2360	xstats = nla_nest_start(skb, TCA_STATS_APP);
2361	if (!xstats)
2362		goto err;
2363
2364	if (taprio_put_stat(skb, stats->window_drops,
2365			    TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) ||
2366	    taprio_put_stat(skb, stats->tx_overruns,
2367			    TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS))
2368		goto err_cancel;
2369
2370	nla_nest_end(skb, xstats);
2371
2372	return 0;
2373
2374err_cancel:
2375	nla_nest_cancel(skb, xstats);
2376err:
2377	return -EMSGSIZE;
2378}
2379
2380static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
2381{
2382	struct tc_taprio_qopt_offload offload = {
2383		.cmd = TAPRIO_CMD_STATS,
2384	};
2385
2386	return taprio_dump_xstats(sch, d, &offload, &offload.stats);
2387}
2388
2389static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
2390{
2391	struct taprio_sched *q = qdisc_priv(sch);
2392	struct net_device *dev = qdisc_dev(sch);
2393	struct sched_gate_list *oper, *admin;
2394	struct tc_mqprio_qopt opt = { 0 };
2395	struct nlattr *nest, *sched_nest;
2396
2397	oper = rtnl_dereference(q->oper_sched);
2398	admin = rtnl_dereference(q->admin_sched);
2399
2400	mqprio_qopt_reconstruct(dev, &opt);
2401
2402	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
2403	if (!nest)
2404		goto start_error;
2405
2406	if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
2407		goto options_error;
2408
2409	if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
2410	    nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
2411		goto options_error;
2412
2413	if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
2414		goto options_error;
2415
2416	if (q->txtime_delay &&
2417	    nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
2418		goto options_error;
2419
2420	if (oper && taprio_dump_tc_entries(skb, q, oper))
2421		goto options_error;
2422
2423	if (oper && dump_schedule(skb, oper))
2424		goto options_error;
2425
2426	if (!admin)
2427		goto done;
2428
2429	sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
2430	if (!sched_nest)
2431		goto options_error;
2432
2433	if (dump_schedule(skb, admin))
2434		goto admin_error;
2435
2436	nla_nest_end(skb, sched_nest);
2437
2438done:
2439	return nla_nest_end(skb, nest);
2440
2441admin_error:
2442	nla_nest_cancel(skb, sched_nest);
2443
2444options_error:
2445	nla_nest_cancel(skb, nest);
2446
2447start_error:
2448	return -ENOSPC;
2449}
2450
2451static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
2452{
2453	struct taprio_sched *q = qdisc_priv(sch);
2454	struct net_device *dev = qdisc_dev(sch);
2455	unsigned int ntx = cl - 1;
2456
2457	if (ntx >= dev->num_tx_queues)
2458		return NULL;
2459
2460	return q->qdiscs[ntx];
2461}
2462
2463static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
2464{
2465	unsigned int ntx = TC_H_MIN(classid);
2466
2467	if (!taprio_queue_get(sch, ntx))
2468		return 0;
2469	return ntx;
2470}
2471
2472static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
2473			     struct sk_buff *skb, struct tcmsg *tcm)
2474{
2475	struct Qdisc *child = taprio_leaf(sch, cl);
2476
2477	tcm->tcm_parent = TC_H_ROOT;
2478	tcm->tcm_handle |= TC_H_MIN(cl);
2479	tcm->tcm_info = child->handle;
2480
2481	return 0;
2482}
2483
2484static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
2485				   struct gnet_dump *d)
2486	__releases(d->lock)
2487	__acquires(d->lock)
2488{
2489	struct Qdisc *child = taprio_leaf(sch, cl);
2490	struct tc_taprio_qopt_offload offload = {
2491		.cmd = TAPRIO_CMD_QUEUE_STATS,
2492		.queue_stats = {
2493			.queue = cl - 1,
2494		},
2495	};
2496
2497	if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
2498	    qdisc_qstats_copy(d, child) < 0)
2499		return -1;
2500
2501	return taprio_dump_xstats(sch, d, &offload, &offload.queue_stats.stats);
2502}
2503
2504static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2505{
2506	struct net_device *dev = qdisc_dev(sch);
2507	unsigned long ntx;
2508
2509	if (arg->stop)
2510		return;
2511
2512	arg->count = arg->skip;
2513	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
2514		if (!tc_qdisc_stats_dump(sch, ntx + 1, arg))
2515			break;
2516	}
2517}
2518
2519static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
2520						struct tcmsg *tcm)
2521{
2522	return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
2523}
2524
2525static const struct Qdisc_class_ops taprio_class_ops = {
2526	.graft		= taprio_graft,
2527	.leaf		= taprio_leaf,
2528	.find		= taprio_find,
2529	.walk		= taprio_walk,
2530	.dump		= taprio_dump_class,
2531	.dump_stats	= taprio_dump_class_stats,
2532	.select_queue	= taprio_select_queue,
2533};
2534
2535static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
2536	.cl_ops		= &taprio_class_ops,
2537	.id		= "taprio",
2538	.priv_size	= sizeof(struct taprio_sched),
2539	.init		= taprio_init,
2540	.change		= taprio_change,
2541	.destroy	= taprio_destroy,
2542	.reset		= taprio_reset,
2543	.attach		= taprio_attach,
2544	.peek		= taprio_peek,
2545	.dequeue	= taprio_dequeue,
2546	.enqueue	= taprio_enqueue,
2547	.dump		= taprio_dump,
2548	.dump_stats	= taprio_dump_stats,
2549	.owner		= THIS_MODULE,
2550};
2551
2552static struct notifier_block taprio_device_notifier = {
2553	.notifier_call = taprio_dev_notifier,
2554};
2555
2556static int __init taprio_module_init(void)
2557{
2558	int err = register_netdevice_notifier(&taprio_device_notifier);
2559
2560	if (err)
2561		return err;
2562
2563	return register_qdisc(&taprio_qdisc_ops);
2564}
2565
2566static void __exit taprio_module_exit(void)
2567{
2568	unregister_qdisc(&taprio_qdisc_ops);
2569	unregister_netdevice_notifier(&taprio_device_notifier);
2570}
2571
2572module_init(taprio_module_init);
2573module_exit(taprio_module_exit);
2574MODULE_LICENSE("GPL");
2575MODULE_DESCRIPTION("Time Aware Priority qdisc");