sja1105_tas.c - drivers/net/dsa/sja1105/sja1105_tas.c - Linux diff v6.13.7

  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
  3 */
  4#include "sja1105.h"
  5
  6#define SJA1105_TAS_CLKSRC_DISABLED	0
  7#define SJA1105_TAS_CLKSRC_STANDALONE	1
  8#define SJA1105_TAS_CLKSRC_AS6802	2
  9#define SJA1105_TAS_CLKSRC_PTP		3
 10#define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
 11
 12#define work_to_sja1105_tas(d) \
 13	container_of((d), struct sja1105_tas_data, tas_work)
 14#define tas_to_sja1105(d) \
 15	container_of((d), struct sja1105_private, tas_data)
 16
 17static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
 18{
 19	struct sja1105_tas_data *tas_data = &priv->tas_data;
 20	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
 21	struct dsa_switch *ds = priv->ds;
 22	s64 earliest_base_time = S64_MAX;
 23	s64 latest_base_time = 0;
 24	s64 its_cycle_time = 0;
 25	s64 max_cycle_time = 0;
 26	int port;
 27
 28	tas_data->enabled = false;
 29
 30	for (port = 0; port < ds->num_ports; port++) {
 31		const struct tc_taprio_qopt_offload *offload;
 32
 33		offload = tas_data->offload[port];
 34		if (!offload)
 35			continue;
 36
 37		tas_data->enabled = true;
 38
 39		if (max_cycle_time < offload->cycle_time)
 40			max_cycle_time = offload->cycle_time;
 41		if (latest_base_time < offload->base_time)
 42			latest_base_time = offload->base_time;
 43		if (earliest_base_time > offload->base_time) {
 44			earliest_base_time = offload->base_time;
 45			its_cycle_time = offload->cycle_time;
 46		}
 47	}
 48
 49	if (!list_empty(&gating_cfg->entries)) {
 50		tas_data->enabled = true;
 51
 52		if (max_cycle_time < gating_cfg->cycle_time)
 53			max_cycle_time = gating_cfg->cycle_time;
 54		if (latest_base_time < gating_cfg->base_time)
 55			latest_base_time = gating_cfg->base_time;
 56		if (earliest_base_time > gating_cfg->base_time) {
 57			earliest_base_time = gating_cfg->base_time;
 58			its_cycle_time = gating_cfg->cycle_time;
 59		}
 60	}
 61
 62	if (!tas_data->enabled)
 63		return 0;
 64
 65	/* Roll the earliest base time over until it is in a comparable
 66	 * time base with the latest, then compare their deltas.
 67	 * We want to enforce that all ports' base times are within
 68	 * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
 69	 */
 70	earliest_base_time = future_base_time(earliest_base_time,
 71					      its_cycle_time,
 72					      latest_base_time);
 73	while (earliest_base_time > latest_base_time)
 74		earliest_base_time -= its_cycle_time;
 75	if (latest_base_time - earliest_base_time >
 76	    sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
 77		dev_err(ds->dev,
 78			"Base times too far apart: min %llu max %llu\n",
 79			earliest_base_time, latest_base_time);
 80		return -ERANGE;
 81	}
 82
 83	tas_data->earliest_base_time = earliest_base_time;
 84	tas_data->max_cycle_time = max_cycle_time;
 85
 86	dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
 87	dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
 88	dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
 89
 90	return 0;
 91}
 92
 93/* Lo and behold: the egress scheduler from hell.
 94 *
 95 * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
 96 * all schedule entries for all ports. These are the Gate Control List (GCL)
 97 * entries, let's call them "timeslots" for short. This linear array of
 98 * timeslots is held in BLK_IDX_SCHEDULE.
 99 *
100 * Then there are a maximum of 8 "execution threads" inside the switch, which
101 * iterate cyclically through the "schedule". Each "cycle" has an entry point
102 * and an exit point, both being timeslot indices in the schedule table. The
103 * hardware calls each cycle a "subschedule".
104 *
105 * Subschedule (cycle) i starts when
106 *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
107 *
108 * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
109 *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
110 *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
111 *
112 * For each schedule entry (timeslot) k, the engine executes the gate control
113 * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
114 *
115 *         +---------+
116 *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
117 *         +---------+
118 *              |
119 *              +-----------------+
120 *                                | .actsubsch
121 *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
122 *                 +-------+-------+
123 *                 |cycle 0|cycle 1|
124 *                 +-------+-------+
125 *                   |  |      |  |
126 *  +----------------+  |      |  +-------------------------------------+
127 *  |   .subschindx     |      |             .subschindx                |
128 *  |                   |      +---------------+                        |
129 *  |          .address |        .address      |                        |
130 *  |                   |                      |                        |
131 *  |                   |                      |                        |
132 *  |  BLK_IDX_SCHEDULE v                      v                        |
133 *  |              +-------+-------+-------+-------+-------+------+     |
134 *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
135 *  |              +-------+-------+-------+-------+-------+------+     |
136 *  |                                  ^                    ^  ^  ^     |
137 *  |                                  |                    |  |  |     |
138 *  |        +-------------------------+                    |  |  |     |
139 *  |        |              +-------------------------------+  |  |     |
140 *  |        |              |              +-------------------+  |     |
141 *  |        |              |              |                      |     |
142 *  | +---------------------------------------------------------------+ |
143 *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
144 *  | +---------------------------------------------------------------+ |
145 *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
146 *  |        |              |                                           |
147 *  +--------+              +-------------------------------------------+
148 *
149 *  In the above picture there are two subschedules (cycles):
150 *
151 *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
152 *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
153 *
154 *  All other possible execution threads must be marked as unused by making
155 *  their "subschedule end index" (subscheind) equal to the last valid
156 *  subschedule's end index (in this case 5).
157 */
158int sja1105_init_scheduling(struct sja1105_private *priv)
159{
160	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
161	struct sja1105_schedule_entry_points_params_entry
162					*schedule_entry_points_params;
163	struct sja1105_schedule_params_entry *schedule_params;
164	struct sja1105_tas_data *tas_data = &priv->tas_data;
165	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
166	struct sja1105_schedule_entry *schedule;
167	struct dsa_switch *ds = priv->ds;
168	struct sja1105_table *table;
169	int schedule_start_idx;
170	s64 entry_point_delta;
171	int schedule_end_idx;
172	int num_entries = 0;
173	int num_cycles = 0;
174	int cycle = 0;
175	int i, k = 0;
176	int port, rc;
177
178	rc = sja1105_tas_set_runtime_params(priv);
179	if (rc < 0)
180		return rc;
181
182	/* Discard previous Schedule Table */
183	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
184	if (table->entry_count) {
185		kfree(table->entries);
186		table->entry_count = 0;
187	}
188
189	/* Discard previous Schedule Entry Points Parameters Table */
190	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
191	if (table->entry_count) {
192		kfree(table->entries);
193		table->entry_count = 0;
194	}
195
196	/* Discard previous Schedule Parameters Table */
197	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
198	if (table->entry_count) {
199		kfree(table->entries);
200		table->entry_count = 0;
201	}
202
203	/* Discard previous Schedule Entry Points Table */
204	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
205	if (table->entry_count) {
206		kfree(table->entries);
207		table->entry_count = 0;
208	}
209
210	/* Figure out the dimensioning of the problem */
211	for (port = 0; port < ds->num_ports; port++) {
212		if (tas_data->offload[port]) {
213			num_entries += tas_data->offload[port]->num_entries;
214			num_cycles++;
215		}
216	}
217
218	if (!list_empty(&gating_cfg->entries)) {
219		num_entries += gating_cfg->num_entries;
220		num_cycles++;
221	}
222
223	/* Nothing to do */
224	if (!num_cycles)
225		return 0;
226
227	/* Pre-allocate space in the static config tables */
228
229	/* Schedule Table */
230	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
231	table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
232				 GFP_KERNEL);
233	if (!table->entries)
234		return -ENOMEM;
235	table->entry_count = num_entries;
236	schedule = table->entries;
237
238	/* Schedule Points Parameters Table */
239	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
240	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
241				 table->ops->unpacked_entry_size, GFP_KERNEL);
242	if (!table->entries)
243		/* Previously allocated memory will be freed automatically in
244		 * sja1105_static_config_free. This is true for all early
245		 * returns below.
246		 */
247		return -ENOMEM;
248	table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
249	schedule_entry_points_params = table->entries;
250
251	/* Schedule Parameters Table */
252	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
253	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
254				 table->ops->unpacked_entry_size, GFP_KERNEL);
255	if (!table->entries)
256		return -ENOMEM;
257	table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
258	schedule_params = table->entries;
259
260	/* Schedule Entry Points Table */
261	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
262	table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
263				 GFP_KERNEL);
264	if (!table->entries)
265		return -ENOMEM;
266	table->entry_count = num_cycles;
267	schedule_entry_points = table->entries;
268
269	/* Finally start populating the static config tables */
270	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
271	schedule_entry_points_params->actsubsch = num_cycles - 1;
272
273	for (port = 0; port < ds->num_ports; port++) {
274		const struct tc_taprio_qopt_offload *offload;
275		/* Relative base time */
276		s64 rbt;
277
278		offload = tas_data->offload[port];
279		if (!offload)
280			continue;
281
282		schedule_start_idx = k;
283		schedule_end_idx = k + offload->num_entries - 1;
284		/* This is the base time expressed as a number of TAS ticks
285		 * relative to PTPSCHTM, which we'll (perhaps improperly) call
286		 * the operational base time.
287		 */
288		rbt = future_base_time(offload->base_time,
289				       offload->cycle_time,
290				       tas_data->earliest_base_time);
291		rbt -= tas_data->earliest_base_time;
292		/* UM10944.pdf 4.2.2. Schedule Entry Points table says that
293		 * delta cannot be zero, which is shitty. Advance all relative
294		 * base times by 1 TAS delta, so that even the earliest base
295		 * time becomes 1 in relative terms. Then start the operational
296		 * base time (PTPSCHTM) one TAS delta earlier than planned.
297		 */
298		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
299
300		schedule_entry_points[cycle].subschindx = cycle;
301		schedule_entry_points[cycle].delta = entry_point_delta;
302		schedule_entry_points[cycle].address = schedule_start_idx;
303
304		/* The subschedule end indices need to be
305		 * monotonically increasing.
306		 */
307		for (i = cycle; i < 8; i++)
308			schedule_params->subscheind[i] = schedule_end_idx;
309
310		for (i = 0; i < offload->num_entries; i++, k++) {
311			s64 delta_ns = offload->entries[i].interval;
312
313			schedule[k].delta = ns_to_sja1105_delta(delta_ns);
314			schedule[k].destports = BIT(port);
315			schedule[k].resmedia_en = true;
316			schedule[k].resmedia = SJA1105_GATE_MASK &
317					~offload->entries[i].gate_mask;
318		}
319		cycle++;
320	}
321
322	if (!list_empty(&gating_cfg->entries)) {
323		struct sja1105_gate_entry *e;
324
325		/* Relative base time */
326		s64 rbt;
327
328		schedule_start_idx = k;
329		schedule_end_idx = k + gating_cfg->num_entries - 1;
330		rbt = future_base_time(gating_cfg->base_time,
331				       gating_cfg->cycle_time,
332				       tas_data->earliest_base_time);
333		rbt -= tas_data->earliest_base_time;
334		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
335
336		schedule_entry_points[cycle].subschindx = cycle;
337		schedule_entry_points[cycle].delta = entry_point_delta;
338		schedule_entry_points[cycle].address = schedule_start_idx;
339
340		for (i = cycle; i < 8; i++)
341			schedule_params->subscheind[i] = schedule_end_idx;
342
343		list_for_each_entry(e, &gating_cfg->entries, list) {
344			schedule[k].delta = ns_to_sja1105_delta(e->interval);
345			schedule[k].destports = e->rule->vl.destports;
346			schedule[k].setvalid = true;
347			schedule[k].txen = true;
348			schedule[k].vlindex = e->rule->vl.sharindx;
349			schedule[k].winstindex = e->rule->vl.sharindx;
350			if (e->gate_state) /* Gate open */
351				schedule[k].winst = true;
352			else /* Gate closed */
353				schedule[k].winend = true;
354			k++;
355		}
356	}
357
358	return 0;
359}
360
361/* Be there 2 port subschedules, each executing an arbitrary number of gate
362 * open/close events cyclically.
363 * None of those gate events must ever occur at the exact same time, otherwise
364 * the switch is known to act in exotically strange ways.
365 * However the hardware doesn't bother performing these integrity checks.
366 * So here we are with the task of validating whether the new @admin offload
367 * has any conflict with the already established TAS configuration in
368 * tas_data->offload.  We already know the other ports are in harmony with one
369 * another, otherwise we wouldn't have saved them.
370 * Each gate event executes periodically, with a period of @cycle_time and a
371 * phase given by its cycle's @base_time plus its offset within the cycle
372 * (which in turn is given by the length of the events prior to it).
373 * There are two aspects to possible collisions:
374 * - Collisions within one cycle's (actually the longest cycle's) time frame.
375 *   For that, we need to compare the cartesian product of each possible
376 *   occurrence of each event within one cycle time.
377 * - Collisions in the future. Events may not collide within one cycle time,
378 *   but if two port schedules don't have the same periodicity (aka the cycle
379 *   times aren't multiples of one another), they surely will some time in the
380 *   future (actually they will collide an infinite amount of times).
381 */
382static bool
383sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
384			    const struct tc_taprio_qopt_offload *admin)
385{
386	struct sja1105_tas_data *tas_data = &priv->tas_data;
387	const struct tc_taprio_qopt_offload *offload;
388	s64 max_cycle_time, min_cycle_time;
389	s64 delta1, delta2;
390	s64 rbt1, rbt2;
391	s64 stop_time;
392	s64 t1, t2;
393	int i, j;
394	s32 rem;
395
396	offload = tas_data->offload[port];
397	if (!offload)
398		return false;
399
400	/* Check if the two cycle times are multiples of one another.
401	 * If they aren't, then they will surely collide.
402	 */
403	max_cycle_time = max(offload->cycle_time, admin->cycle_time);
404	min_cycle_time = min(offload->cycle_time, admin->cycle_time);
405	div_s64_rem(max_cycle_time, min_cycle_time, &rem);
406	if (rem)
407		return true;
408
409	/* Calculate the "reduced" base time of each of the two cycles
410	 * (transposed back as close to 0 as possible) by dividing to
411	 * the cycle time.
412	 */
413	div_s64_rem(offload->base_time, offload->cycle_time, &rem);
414	rbt1 = rem;
415
416	div_s64_rem(admin->base_time, admin->cycle_time, &rem);
417	rbt2 = rem;
418
419	stop_time = max_cycle_time + max(rbt1, rbt2);
420
421	/* delta1 is the relative base time of each GCL entry within
422	 * the established ports' TAS config.
423	 */
424	for (i = 0, delta1 = 0;
425	     i < offload->num_entries;
426	     delta1 += offload->entries[i].interval, i++) {
427		/* delta2 is the relative base time of each GCL entry
428		 * within the newly added TAS config.
429		 */
430		for (j = 0, delta2 = 0;
431		     j < admin->num_entries;
432		     delta2 += admin->entries[j].interval, j++) {
433			/* t1 follows all possible occurrences of the
434			 * established ports' GCL entry i within the
435			 * first cycle time.
436			 */
437			for (t1 = rbt1 + delta1;
438			     t1 <= stop_time;
439			     t1 += offload->cycle_time) {
440				/* t2 follows all possible occurrences
441				 * of the newly added GCL entry j
442				 * within the first cycle time.
443				 */
444				for (t2 = rbt2 + delta2;
445				     t2 <= stop_time;
446				     t2 += admin->cycle_time) {
447					if (t1 == t2) {
448						dev_warn(priv->ds->dev,
449							 "GCL entry %d collides with entry %d of port %d\n",
450							 j, i, port);
451						return true;
452					}
453				}
454			}
455		}
456	}
457
458	return false;
459}
460
461/* Check the tc-taprio configuration on @port for conflicts with the tc-gate
462 * global subschedule. If @port is -1, check it against all ports.
463 * To reuse the sja1105_tas_check_conflicts logic without refactoring it,
464 * convert the gating configuration to a dummy tc-taprio offload structure.
465 */
466bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
467				    struct netlink_ext_ack *extack)
468{
469	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
470	size_t num_entries = gating_cfg->num_entries;
471	struct tc_taprio_qopt_offload *dummy;
472	struct dsa_switch *ds = priv->ds;
473	struct sja1105_gate_entry *e;
474	bool conflict;
475	int i = 0;
476
477	if (list_empty(&gating_cfg->entries))
478		return false;
479
480	dummy = kzalloc(struct_size(dummy, entries, num_entries), GFP_KERNEL);
481	if (!dummy) {
482		NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory");
483		return true;
484	}
485
486	dummy->num_entries = num_entries;
487	dummy->base_time = gating_cfg->base_time;
488	dummy->cycle_time = gating_cfg->cycle_time;
489
490	list_for_each_entry(e, &gating_cfg->entries, list)
491		dummy->entries[i++].interval = e->interval;
492
493	if (port != -1) {
494		conflict = sja1105_tas_check_conflicts(priv, port, dummy);
495	} else {
496		for (port = 0; port < ds->num_ports; port++) {
497			conflict = sja1105_tas_check_conflicts(priv, port,
498							       dummy);
499			if (conflict)
500				break;
501		}
502	}
503
504	kfree(dummy);
505
506	return conflict;
507}
508
509int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
510			    struct tc_taprio_qopt_offload *admin)
511{
512	struct sja1105_private *priv = ds->priv;
513	struct sja1105_tas_data *tas_data = &priv->tas_data;
514	int other_port, rc, i;
515
516	/* Can't change an already configured port (must delete qdisc first).
517	 * Can't delete the qdisc from an unconfigured port.
518	 */
519	if ((!!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_REPLACE) ||
520	    (!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_DESTROY))
521		return -EINVAL;
522
523	if (admin->cmd == TAPRIO_CMD_DESTROY) {
524		taprio_offload_free(tas_data->offload[port]);
525		tas_data->offload[port] = NULL;
526
527		rc = sja1105_init_scheduling(priv);
528		if (rc < 0)
529			return rc;
530
531		return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
532	} else if (admin->cmd != TAPRIO_CMD_REPLACE) {
533		return -EOPNOTSUPP;
534	}
535
536	/* The cycle time extension is the amount of time the last cycle from
537	 * the old OPER needs to be extended in order to phase-align with the
538	 * base time of the ADMIN when that becomes the new OPER.
539	 * But of course our switch needs to be reset to switch-over between
540	 * the ADMIN and the OPER configs - so much for a seamless transition.
541	 * So don't add insult over injury and just say we don't support cycle
542	 * time extension.
543	 */
544	if (admin->cycle_time_extension)
545		return -ENOTSUPP;
546
547	for (i = 0; i < admin->num_entries; i++) {
548		s64 delta_ns = admin->entries[i].interval;
549		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
550		bool too_long, too_short;
551
552		too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
553		too_short = (delta_cycles == 0);
554		if (too_long || too_short) {
555			dev_err(priv->ds->dev,
556				"Interval %llu too %s for GCL entry %d\n",
557				delta_ns, too_long ? "long" : "short", i);
558			return -ERANGE;
559		}
560	}
561
562	for (other_port = 0; other_port < ds->num_ports; other_port++) {
563		if (other_port == port)
564			continue;
565
566		if (sja1105_tas_check_conflicts(priv, other_port, admin))
567			return -ERANGE;
568	}
569
570	if (sja1105_gating_check_conflicts(priv, port, NULL)) {
571		dev_err(ds->dev, "Conflict with tc-gate schedule\n");
572		return -ERANGE;
573	}
574
575	tas_data->offload[port] = taprio_offload_get(admin);
576
577	rc = sja1105_init_scheduling(priv);
578	if (rc < 0)
579		return rc;
580
581	return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
582}
583
584static int sja1105_tas_check_running(struct sja1105_private *priv)
585{
586	struct sja1105_tas_data *tas_data = &priv->tas_data;
587	struct dsa_switch *ds = priv->ds;
588	struct sja1105_ptp_cmd cmd = {0};
589	int rc;
590
591	rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
592	if (rc < 0)
593		return rc;
594
595	if (cmd.ptpstrtsch == 1)
596		/* Schedule successfully started */
597		tas_data->state = SJA1105_TAS_STATE_RUNNING;
598	else if (cmd.ptpstopsch == 1)
599		/* Schedule is stopped */
600		tas_data->state = SJA1105_TAS_STATE_DISABLED;
601	else
602		/* Schedule is probably not configured with PTP clock source */
603		rc = -EINVAL;
604
605	return rc;
606}
607
608/* Write to PTPCLKCORP */
609static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
610				    u64 correction)
611{
612	const struct sja1105_regs *regs = priv->info->regs;
613	u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
614
615	return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
616				&ptpclkcorp, NULL);
617}
618
619/* Write to PTPSCHTM */
620static int sja1105_tas_set_base_time(struct sja1105_private *priv,
621				     u64 base_time)
622{
623	const struct sja1105_regs *regs = priv->info->regs;
624	u64 ptpschtm = ns_to_sja1105_ticks(base_time);
625
626	return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
627				&ptpschtm, NULL);
628}
629
630static int sja1105_tas_start(struct sja1105_private *priv)
631{
632	struct sja1105_tas_data *tas_data = &priv->tas_data;
633	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
634	struct dsa_switch *ds = priv->ds;
635	int rc;
636
637	dev_dbg(ds->dev, "Starting the TAS\n");
638
639	if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
640	    tas_data->state == SJA1105_TAS_STATE_RUNNING) {
641		dev_err(ds->dev, "TAS already started\n");
642		return -EINVAL;
643	}
644
645	cmd->ptpstrtsch = 1;
646	cmd->ptpstopsch = 0;
647
648	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
649	if (rc < 0)
650		return rc;
651
652	tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
653
654	return 0;
655}
656
657static int sja1105_tas_stop(struct sja1105_private *priv)
658{
659	struct sja1105_tas_data *tas_data = &priv->tas_data;
660	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
661	struct dsa_switch *ds = priv->ds;
662	int rc;
663
664	dev_dbg(ds->dev, "Stopping the TAS\n");
665
666	if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
667		dev_err(ds->dev, "TAS already disabled\n");
668		return -EINVAL;
669	}
670
671	cmd->ptpstopsch = 1;
672	cmd->ptpstrtsch = 0;
673
674	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
675	if (rc < 0)
676		return rc;
677
678	tas_data->state = SJA1105_TAS_STATE_DISABLED;
679
680	return 0;
681}
682
683/* The schedule engine and the PTP clock are driven by the same oscillator, and
684 * they run in parallel. But whilst the PTP clock can keep an absolute
685 * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
686 * up a delta, which is 200ns), and wrapping around at the end of each cycle.
687 * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
688 * (in PTP domain).
689 * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
690 * a software servo, and the schedule engine clock runs in parallel to the PTP
691 * clock, there is logic internal to the switch that periodically keeps the
692 * schedule engine from drifting away. The frequency with which this internal
693 * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
694 * a value also in the PTP clock domain, and is also rate-corrected.
695 * To be precise, during a correction period, there is logic to determine by
696 * how many scheduler clock ticks has the PTP clock drifted. At the end of each
697 * correction period/beginning of new one, the length of a delta is shrunk or
698 * expanded with an integer number of ticks, compared with the typical 25.
699 * So a delta lasts for 200ns (or 25 ticks) only on average.
700 * Sometimes it is longer, sometimes it is shorter. The internal syntonization
701 * logic can adjust for at most 5 ticks each 20 ticks.
702 *
703 * The first implication is that you should choose your schedule correction
704 * period to be an integer multiple of the schedule length. Preferably one.
705 * In case there are schedules of multiple ports active, then the correction
706 * period needs to be a multiple of them all. Given the restriction that the
707 * cycle times have to be multiples of one another anyway, this means the
708 * correction period can simply be the largest cycle time, hence the current
709 * choice. This way, the updates are always synchronous to the transmission
710 * cycle, and therefore predictable.
711 *
712 * The second implication is that at the beginning of a correction period, the
713 * first few deltas will be modulated in time, until the schedule engine is
714 * properly phase-aligned with the PTP clock. For this reason, you should place
715 * your best-effort traffic at the beginning of a cycle, and your
716 * time-triggered traffic afterwards.
717 *
718 * The third implication is that once the schedule engine is started, it can
719 * only adjust for so much drift within a correction period. In the servo you
720 * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
721 * want to do the latter, you need to stop and restart the schedule engine,
722 * which is what the state machine handles.
723 */
724static void sja1105_tas_state_machine(struct work_struct *work)
725{
726	struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
727	struct sja1105_private *priv = tas_to_sja1105(tas_data);
728	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
729	struct timespec64 base_time_ts, now_ts;
730	struct dsa_switch *ds = priv->ds;
731	struct timespec64 diff;
732	s64 base_time, now;
733	int rc = 0;
734
735	mutex_lock(&ptp_data->lock);
736
737	switch (tas_data->state) {
738	case SJA1105_TAS_STATE_DISABLED:
739		/* Can't do anything at all if clock is still being stepped */
740		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
741			break;
742
743		rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
744		if (rc < 0)
745			break;
746
747		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
748		if (rc < 0)
749			break;
750
751		/* Plan to start the earliest schedule first. The others
752		 * will be started in hardware, by way of their respective
753		 * entry points delta.
754		 * Try our best to avoid fringe cases (race condition between
755		 * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
756		 * least one second in the future from now. This is not ideal,
757		 * but this only needs to buy us time until the
758		 * sja1105_tas_start command below gets executed.
759		 */
760		base_time = future_base_time(tas_data->earliest_base_time,
761					     tas_data->max_cycle_time,
762					     now + 1ull * NSEC_PER_SEC);
763		base_time -= sja1105_delta_to_ns(1);
764
765		rc = sja1105_tas_set_base_time(priv, base_time);
766		if (rc < 0)
767			break;
768
769		tas_data->oper_base_time = base_time;
770
771		rc = sja1105_tas_start(priv);
772		if (rc < 0)
773			break;
774
775		base_time_ts = ns_to_timespec64(base_time);
776		now_ts = ns_to_timespec64(now);
777
778		dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
779			base_time_ts.tv_sec, base_time_ts.tv_nsec,
780			now_ts.tv_sec, now_ts.tv_nsec);
781
782		break;
783
784	case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
785		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
786			/* Clock was stepped.. bad news for TAS */
787			sja1105_tas_stop(priv);
788			break;
789		}
790
791		/* Check if TAS has actually started, by comparing the
792		 * scheduled start time with the SJA1105 PTP clock
793		 */
794		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
795		if (rc < 0)
796			break;
797
798		if (now < tas_data->oper_base_time) {
799			/* TAS has not started yet */
800			diff = ns_to_timespec64(tas_data->oper_base_time - now);
801			dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
802				diff.tv_sec, diff.tv_nsec);
803			break;
804		}
805
806		/* Time elapsed, what happened? */
807		rc = sja1105_tas_check_running(priv);
808		if (rc < 0)
809			break;
810
811		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
812			/* TAS has started */
813			dev_err(ds->dev,
814				"TAS not started despite time elapsed\n");
815
816		break;
817
818	case SJA1105_TAS_STATE_RUNNING:
819		/* Clock was stepped.. bad news for TAS */
820		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
821			sja1105_tas_stop(priv);
822			break;
823		}
824
825		rc = sja1105_tas_check_running(priv);
826		if (rc < 0)
827			break;
828
829		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
830			dev_err(ds->dev, "TAS surprisingly stopped\n");
831
832		break;
833
834	default:
835		if (net_ratelimit())
836			dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
837	}
838
839	if (rc && net_ratelimit())
840		dev_err(ds->dev, "An operation returned %d\n", rc);
841
842	mutex_unlock(&ptp_data->lock);
843}
844
845void sja1105_tas_clockstep(struct dsa_switch *ds)
846{
847	struct sja1105_private *priv = ds->priv;
848	struct sja1105_tas_data *tas_data = &priv->tas_data;
849
850	if (!tas_data->enabled)
851		return;
852
853	tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
854	schedule_work(&tas_data->tas_work);
855}
856
857void sja1105_tas_adjfreq(struct dsa_switch *ds)
858{
859	struct sja1105_private *priv = ds->priv;
860	struct sja1105_tas_data *tas_data = &priv->tas_data;
861
862	if (!tas_data->enabled)
863		return;
864
865	/* No reason to schedule the workqueue, nothing changed */
866	if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
867		return;
868
869	tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
870	schedule_work(&tas_data->tas_work);
871}
872
873void sja1105_tas_setup(struct dsa_switch *ds)
874{
875	struct sja1105_private *priv = ds->priv;
876	struct sja1105_tas_data *tas_data = &priv->tas_data;
877
878	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
879	tas_data->state = SJA1105_TAS_STATE_DISABLED;
880	tas_data->last_op = SJA1105_PTP_NONE;
881
882	INIT_LIST_HEAD(&tas_data->gating_cfg.entries);
883}
884
885void sja1105_tas_teardown(struct dsa_switch *ds)
886{
887	struct sja1105_private *priv = ds->priv;
888	struct tc_taprio_qopt_offload *offload;
889	int port;
890
891	cancel_work_sync(&priv->tas_data.tas_work);
892
893	for (port = 0; port < ds->num_ports; port++) {
894		offload = priv->tas_data.offload[port];
895		if (!offload)
896			continue;
897
898		taprio_offload_free(offload);
899	}
900}

  1// SPDX-License-Identifier: GPL-2.0
  2/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
  3 */
  4#include "sja1105.h"
  5
  6#define SJA1105_TAS_CLKSRC_DISABLED	0
  7#define SJA1105_TAS_CLKSRC_STANDALONE	1
  8#define SJA1105_TAS_CLKSRC_AS6802	2
  9#define SJA1105_TAS_CLKSRC_PTP		3
 10#define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
 11
 12#define work_to_sja1105_tas(d) \
 13	container_of((d), struct sja1105_tas_data, tas_work)
 14#define tas_to_sja1105(d) \
 15	container_of((d), struct sja1105_private, tas_data)
 16
 17static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
 18{
 19	struct sja1105_tas_data *tas_data = &priv->tas_data;
 20	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
 21	struct dsa_switch *ds = priv->ds;
 22	s64 earliest_base_time = S64_MAX;
 23	s64 latest_base_time = 0;
 24	s64 its_cycle_time = 0;
 25	s64 max_cycle_time = 0;
 26	int port;
 27
 28	tas_data->enabled = false;
 29
 30	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
 31		const struct tc_taprio_qopt_offload *offload;
 32
 33		offload = tas_data->offload[port];
 34		if (!offload)
 35			continue;
 36
 37		tas_data->enabled = true;
 38
 39		if (max_cycle_time < offload->cycle_time)
 40			max_cycle_time = offload->cycle_time;
 41		if (latest_base_time < offload->base_time)
 42			latest_base_time = offload->base_time;
 43		if (earliest_base_time > offload->base_time) {
 44			earliest_base_time = offload->base_time;
 45			its_cycle_time = offload->cycle_time;
 46		}
 47	}
 48
 49	if (!list_empty(&gating_cfg->entries)) {
 50		tas_data->enabled = true;
 51
 52		if (max_cycle_time < gating_cfg->cycle_time)
 53			max_cycle_time = gating_cfg->cycle_time;
 54		if (latest_base_time < gating_cfg->base_time)
 55			latest_base_time = gating_cfg->base_time;
 56		if (earliest_base_time > gating_cfg->base_time) {
 57			earliest_base_time = gating_cfg->base_time;
 58			its_cycle_time = gating_cfg->cycle_time;
 59		}
 60	}
 61
 62	if (!tas_data->enabled)
 63		return 0;
 64
 65	/* Roll the earliest base time over until it is in a comparable
 66	 * time base with the latest, then compare their deltas.
 67	 * We want to enforce that all ports' base times are within
 68	 * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
 69	 */
 70	earliest_base_time = future_base_time(earliest_base_time,
 71					      its_cycle_time,
 72					      latest_base_time);
 73	while (earliest_base_time > latest_base_time)
 74		earliest_base_time -= its_cycle_time;
 75	if (latest_base_time - earliest_base_time >
 76	    sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
 77		dev_err(ds->dev,
 78			"Base times too far apart: min %llu max %llu\n",
 79			earliest_base_time, latest_base_time);
 80		return -ERANGE;
 81	}
 82
 83	tas_data->earliest_base_time = earliest_base_time;
 84	tas_data->max_cycle_time = max_cycle_time;
 85
 86	dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
 87	dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
 88	dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
 89
 90	return 0;
 91}
 92
 93/* Lo and behold: the egress scheduler from hell.
 94 *
 95 * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
 96 * all schedule entries for all ports. These are the Gate Control List (GCL)
 97 * entries, let's call them "timeslots" for short. This linear array of
 98 * timeslots is held in BLK_IDX_SCHEDULE.
 99 *
100 * Then there are a maximum of 8 "execution threads" inside the switch, which
101 * iterate cyclically through the "schedule". Each "cycle" has an entry point
102 * and an exit point, both being timeslot indices in the schedule table. The
103 * hardware calls each cycle a "subschedule".
104 *
105 * Subschedule (cycle) i starts when
106 *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
107 *
108 * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
109 *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
110 *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
111 *
112 * For each schedule entry (timeslot) k, the engine executes the gate control
113 * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
114 *
115 *         +---------+
116 *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
117 *         +---------+
118 *              |
119 *              +-----------------+
120 *                                | .actsubsch
121 *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
122 *                 +-------+-------+
123 *                 |cycle 0|cycle 1|
124 *                 +-------+-------+
125 *                   |  |      |  |
126 *  +----------------+  |      |  +-------------------------------------+
127 *  |   .subschindx     |      |             .subschindx                |
128 *  |                   |      +---------------+                        |
129 *  |          .address |        .address      |                        |
130 *  |                   |                      |                        |
131 *  |                   |                      |                        |
132 *  |  BLK_IDX_SCHEDULE v                      v                        |
133 *  |              +-------+-------+-------+-------+-------+------+     |
134 *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
135 *  |              +-------+-------+-------+-------+-------+------+     |
136 *  |                                  ^                    ^  ^  ^     |
137 *  |                                  |                    |  |  |     |
138 *  |        +-------------------------+                    |  |  |     |
139 *  |        |              +-------------------------------+  |  |     |
140 *  |        |              |              +-------------------+  |     |
141 *  |        |              |              |                      |     |
142 *  | +---------------------------------------------------------------+ |
143 *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
144 *  | +---------------------------------------------------------------+ |
145 *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
146 *  |        |              |                                           |
147 *  +--------+              +-------------------------------------------+
148 *
149 *  In the above picture there are two subschedules (cycles):
150 *
151 *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
152 *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
153 *
154 *  All other possible execution threads must be marked as unused by making
155 *  their "subschedule end index" (subscheind) equal to the last valid
156 *  subschedule's end index (in this case 5).
157 */
158int sja1105_init_scheduling(struct sja1105_private *priv)
159{
160	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
161	struct sja1105_schedule_entry_points_params_entry
162					*schedule_entry_points_params;
163	struct sja1105_schedule_params_entry *schedule_params;
164	struct sja1105_tas_data *tas_data = &priv->tas_data;
165	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
166	struct sja1105_schedule_entry *schedule;
 
167	struct sja1105_table *table;
168	int schedule_start_idx;
169	s64 entry_point_delta;
170	int schedule_end_idx;
171	int num_entries = 0;
172	int num_cycles = 0;
173	int cycle = 0;
174	int i, k = 0;
175	int port, rc;
176
177	rc = sja1105_tas_set_runtime_params(priv);
178	if (rc < 0)
179		return rc;
180
181	/* Discard previous Schedule Table */
182	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
183	if (table->entry_count) {
184		kfree(table->entries);
185		table->entry_count = 0;
186	}
187
188	/* Discard previous Schedule Entry Points Parameters Table */
189	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
190	if (table->entry_count) {
191		kfree(table->entries);
192		table->entry_count = 0;
193	}
194
195	/* Discard previous Schedule Parameters Table */
196	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
197	if (table->entry_count) {
198		kfree(table->entries);
199		table->entry_count = 0;
200	}
201
202	/* Discard previous Schedule Entry Points Table */
203	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
204	if (table->entry_count) {
205		kfree(table->entries);
206		table->entry_count = 0;
207	}
208
209	/* Figure out the dimensioning of the problem */
210	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
211		if (tas_data->offload[port]) {
212			num_entries += tas_data->offload[port]->num_entries;
213			num_cycles++;
214		}
215	}
216
217	if (!list_empty(&gating_cfg->entries)) {
218		num_entries += gating_cfg->num_entries;
219		num_cycles++;
220	}
221
222	/* Nothing to do */
223	if (!num_cycles)
224		return 0;
225
226	/* Pre-allocate space in the static config tables */
227
228	/* Schedule Table */
229	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
230	table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
231				 GFP_KERNEL);
232	if (!table->entries)
233		return -ENOMEM;
234	table->entry_count = num_entries;
235	schedule = table->entries;
236
237	/* Schedule Points Parameters Table */
238	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
239	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
240				 table->ops->unpacked_entry_size, GFP_KERNEL);
241	if (!table->entries)
242		/* Previously allocated memory will be freed automatically in
243		 * sja1105_static_config_free. This is true for all early
244		 * returns below.
245		 */
246		return -ENOMEM;
247	table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
248	schedule_entry_points_params = table->entries;
249
250	/* Schedule Parameters Table */
251	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
252	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
253				 table->ops->unpacked_entry_size, GFP_KERNEL);
254	if (!table->entries)
255		return -ENOMEM;
256	table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
257	schedule_params = table->entries;
258
259	/* Schedule Entry Points Table */
260	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
261	table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
262				 GFP_KERNEL);
263	if (!table->entries)
264		return -ENOMEM;
265	table->entry_count = num_cycles;
266	schedule_entry_points = table->entries;
267
268	/* Finally start populating the static config tables */
269	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
270	schedule_entry_points_params->actsubsch = num_cycles - 1;
271
272	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
273		const struct tc_taprio_qopt_offload *offload;
274		/* Relative base time */
275		s64 rbt;
276
277		offload = tas_data->offload[port];
278		if (!offload)
279			continue;
280
281		schedule_start_idx = k;
282		schedule_end_idx = k + offload->num_entries - 1;
283		/* This is the base time expressed as a number of TAS ticks
284		 * relative to PTPSCHTM, which we'll (perhaps improperly) call
285		 * the operational base time.
286		 */
287		rbt = future_base_time(offload->base_time,
288				       offload->cycle_time,
289				       tas_data->earliest_base_time);
290		rbt -= tas_data->earliest_base_time;
291		/* UM10944.pdf 4.2.2. Schedule Entry Points table says that
292		 * delta cannot be zero, which is shitty. Advance all relative
293		 * base times by 1 TAS delta, so that even the earliest base
294		 * time becomes 1 in relative terms. Then start the operational
295		 * base time (PTPSCHTM) one TAS delta earlier than planned.
296		 */
297		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
298
299		schedule_entry_points[cycle].subschindx = cycle;
300		schedule_entry_points[cycle].delta = entry_point_delta;
301		schedule_entry_points[cycle].address = schedule_start_idx;
302
303		/* The subschedule end indices need to be
304		 * monotonically increasing.
305		 */
306		for (i = cycle; i < 8; i++)
307			schedule_params->subscheind[i] = schedule_end_idx;
308
309		for (i = 0; i < offload->num_entries; i++, k++) {
310			s64 delta_ns = offload->entries[i].interval;
311
312			schedule[k].delta = ns_to_sja1105_delta(delta_ns);
313			schedule[k].destports = BIT(port);
314			schedule[k].resmedia_en = true;
315			schedule[k].resmedia = SJA1105_GATE_MASK &
316					~offload->entries[i].gate_mask;
317		}
318		cycle++;
319	}
320
321	if (!list_empty(&gating_cfg->entries)) {
322		struct sja1105_gate_entry *e;
323
324		/* Relative base time */
325		s64 rbt;
326
327		schedule_start_idx = k;
328		schedule_end_idx = k + gating_cfg->num_entries - 1;
329		rbt = future_base_time(gating_cfg->base_time,
330				       gating_cfg->cycle_time,
331				       tas_data->earliest_base_time);
332		rbt -= tas_data->earliest_base_time;
333		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
334
335		schedule_entry_points[cycle].subschindx = cycle;
336		schedule_entry_points[cycle].delta = entry_point_delta;
337		schedule_entry_points[cycle].address = schedule_start_idx;
338
339		for (i = cycle; i < 8; i++)
340			schedule_params->subscheind[i] = schedule_end_idx;
341
342		list_for_each_entry(e, &gating_cfg->entries, list) {
343			schedule[k].delta = ns_to_sja1105_delta(e->interval);
344			schedule[k].destports = e->rule->vl.destports;
345			schedule[k].setvalid = true;
346			schedule[k].txen = true;
347			schedule[k].vlindex = e->rule->vl.sharindx;
348			schedule[k].winstindex = e->rule->vl.sharindx;
349			if (e->gate_state) /* Gate open */
350				schedule[k].winst = true;
351			else /* Gate closed */
352				schedule[k].winend = true;
353			k++;
354		}
355	}
356
357	return 0;
358}
359
360/* Be there 2 port subschedules, each executing an arbitrary number of gate
361 * open/close events cyclically.
362 * None of those gate events must ever occur at the exact same time, otherwise
363 * the switch is known to act in exotically strange ways.
364 * However the hardware doesn't bother performing these integrity checks.
365 * So here we are with the task of validating whether the new @admin offload
366 * has any conflict with the already established TAS configuration in
367 * tas_data->offload.  We already know the other ports are in harmony with one
368 * another, otherwise we wouldn't have saved them.
369 * Each gate event executes periodically, with a period of @cycle_time and a
370 * phase given by its cycle's @base_time plus its offset within the cycle
371 * (which in turn is given by the length of the events prior to it).
372 * There are two aspects to possible collisions:
373 * - Collisions within one cycle's (actually the longest cycle's) time frame.
374 *   For that, we need to compare the cartesian product of each possible
375 *   occurrence of each event within one cycle time.
376 * - Collisions in the future. Events may not collide within one cycle time,
377 *   but if two port schedules don't have the same periodicity (aka the cycle
378 *   times aren't multiples of one another), they surely will some time in the
379 *   future (actually they will collide an infinite amount of times).
380 */
381static bool
382sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
383			    const struct tc_taprio_qopt_offload *admin)
384{
385	struct sja1105_tas_data *tas_data = &priv->tas_data;
386	const struct tc_taprio_qopt_offload *offload;
387	s64 max_cycle_time, min_cycle_time;
388	s64 delta1, delta2;
389	s64 rbt1, rbt2;
390	s64 stop_time;
391	s64 t1, t2;
392	int i, j;
393	s32 rem;
394
395	offload = tas_data->offload[port];
396	if (!offload)
397		return false;
398
399	/* Check if the two cycle times are multiples of one another.
400	 * If they aren't, then they will surely collide.
401	 */
402	max_cycle_time = max(offload->cycle_time, admin->cycle_time);
403	min_cycle_time = min(offload->cycle_time, admin->cycle_time);
404	div_s64_rem(max_cycle_time, min_cycle_time, &rem);
405	if (rem)
406		return true;
407
408	/* Calculate the "reduced" base time of each of the two cycles
409	 * (transposed back as close to 0 as possible) by dividing to
410	 * the cycle time.
411	 */
412	div_s64_rem(offload->base_time, offload->cycle_time, &rem);
413	rbt1 = rem;
414
415	div_s64_rem(admin->base_time, admin->cycle_time, &rem);
416	rbt2 = rem;
417
418	stop_time = max_cycle_time + max(rbt1, rbt2);
419
420	/* delta1 is the relative base time of each GCL entry within
421	 * the established ports' TAS config.
422	 */
423	for (i = 0, delta1 = 0;
424	     i < offload->num_entries;
425	     delta1 += offload->entries[i].interval, i++) {
426		/* delta2 is the relative base time of each GCL entry
427		 * within the newly added TAS config.
428		 */
429		for (j = 0, delta2 = 0;
430		     j < admin->num_entries;
431		     delta2 += admin->entries[j].interval, j++) {
432			/* t1 follows all possible occurrences of the
433			 * established ports' GCL entry i within the
434			 * first cycle time.
435			 */
436			for (t1 = rbt1 + delta1;
437			     t1 <= stop_time;
438			     t1 += offload->cycle_time) {
439				/* t2 follows all possible occurrences
440				 * of the newly added GCL entry j
441				 * within the first cycle time.
442				 */
443				for (t2 = rbt2 + delta2;
444				     t2 <= stop_time;
445				     t2 += admin->cycle_time) {
446					if (t1 == t2) {
447						dev_warn(priv->ds->dev,
448							 "GCL entry %d collides with entry %d of port %d\n",
449							 j, i, port);
450						return true;
451					}
452				}
453			}
454		}
455	}
456
457	return false;
458}
459
460/* Check the tc-taprio configuration on @port for conflicts with the tc-gate
461 * global subschedule. If @port is -1, check it against all ports.
462 * To reuse the sja1105_tas_check_conflicts logic without refactoring it,
463 * convert the gating configuration to a dummy tc-taprio offload structure.
464 */
465bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
466				    struct netlink_ext_ack *extack)
467{
468	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
469	size_t num_entries = gating_cfg->num_entries;
470	struct tc_taprio_qopt_offload *dummy;
 
471	struct sja1105_gate_entry *e;
472	bool conflict;
473	int i = 0;
474
475	if (list_empty(&gating_cfg->entries))
476		return false;
477
478	dummy = kzalloc(struct_size(dummy, entries, num_entries), GFP_KERNEL);
479	if (!dummy) {
480		NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory");
481		return true;
482	}
483
484	dummy->num_entries = num_entries;
485	dummy->base_time = gating_cfg->base_time;
486	dummy->cycle_time = gating_cfg->cycle_time;
487
488	list_for_each_entry(e, &gating_cfg->entries, list)
489		dummy->entries[i++].interval = e->interval;
490
491	if (port != -1) {
492		conflict = sja1105_tas_check_conflicts(priv, port, dummy);
493	} else {
494		for (port = 0; port < SJA1105_NUM_PORTS; port++) {
495			conflict = sja1105_tas_check_conflicts(priv, port,
496							       dummy);
497			if (conflict)
498				break;
499		}
500	}
501
502	kfree(dummy);
503
504	return conflict;
505}
506
507int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
508			    struct tc_taprio_qopt_offload *admin)
509{
510	struct sja1105_private *priv = ds->priv;
511	struct sja1105_tas_data *tas_data = &priv->tas_data;
512	int other_port, rc, i;
513
514	/* Can't change an already configured port (must delete qdisc first).
515	 * Can't delete the qdisc from an unconfigured port.
516	 */
517	if (!!tas_data->offload[port] == admin->enable)
 
518		return -EINVAL;
519
520	if (!admin->enable) {
521		taprio_offload_free(tas_data->offload[port]);
522		tas_data->offload[port] = NULL;
523
524		rc = sja1105_init_scheduling(priv);
525		if (rc < 0)
526			return rc;
527
528		return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
 
 
529	}
530
531	/* The cycle time extension is the amount of time the last cycle from
532	 * the old OPER needs to be extended in order to phase-align with the
533	 * base time of the ADMIN when that becomes the new OPER.
534	 * But of course our switch needs to be reset to switch-over between
535	 * the ADMIN and the OPER configs - so much for a seamless transition.
536	 * So don't add insult over injury and just say we don't support cycle
537	 * time extension.
538	 */
539	if (admin->cycle_time_extension)
540		return -ENOTSUPP;
541
542	for (i = 0; i < admin->num_entries; i++) {
543		s64 delta_ns = admin->entries[i].interval;
544		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
545		bool too_long, too_short;
546
547		too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
548		too_short = (delta_cycles == 0);
549		if (too_long || too_short) {
550			dev_err(priv->ds->dev,
551				"Interval %llu too %s for GCL entry %d\n",
552				delta_ns, too_long ? "long" : "short", i);
553			return -ERANGE;
554		}
555	}
556
557	for (other_port = 0; other_port < SJA1105_NUM_PORTS; other_port++) {
558		if (other_port == port)
559			continue;
560
561		if (sja1105_tas_check_conflicts(priv, other_port, admin))
562			return -ERANGE;
563	}
564
565	if (sja1105_gating_check_conflicts(priv, port, NULL)) {
566		dev_err(ds->dev, "Conflict with tc-gate schedule\n");
567		return -ERANGE;
568	}
569
570	tas_data->offload[port] = taprio_offload_get(admin);
571
572	rc = sja1105_init_scheduling(priv);
573	if (rc < 0)
574		return rc;
575
576	return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
577}
578
579static int sja1105_tas_check_running(struct sja1105_private *priv)
580{
581	struct sja1105_tas_data *tas_data = &priv->tas_data;
582	struct dsa_switch *ds = priv->ds;
583	struct sja1105_ptp_cmd cmd = {0};
584	int rc;
585
586	rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
587	if (rc < 0)
588		return rc;
589
590	if (cmd.ptpstrtsch == 1)
591		/* Schedule successfully started */
592		tas_data->state = SJA1105_TAS_STATE_RUNNING;
593	else if (cmd.ptpstopsch == 1)
594		/* Schedule is stopped */
595		tas_data->state = SJA1105_TAS_STATE_DISABLED;
596	else
597		/* Schedule is probably not configured with PTP clock source */
598		rc = -EINVAL;
599
600	return rc;
601}
602
603/* Write to PTPCLKCORP */
604static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
605				    u64 correction)
606{
607	const struct sja1105_regs *regs = priv->info->regs;
608	u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
609
610	return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
611				&ptpclkcorp, NULL);
612}
613
614/* Write to PTPSCHTM */
615static int sja1105_tas_set_base_time(struct sja1105_private *priv,
616				     u64 base_time)
617{
618	const struct sja1105_regs *regs = priv->info->regs;
619	u64 ptpschtm = ns_to_sja1105_ticks(base_time);
620
621	return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
622				&ptpschtm, NULL);
623}
624
625static int sja1105_tas_start(struct sja1105_private *priv)
626{
627	struct sja1105_tas_data *tas_data = &priv->tas_data;
628	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
629	struct dsa_switch *ds = priv->ds;
630	int rc;
631
632	dev_dbg(ds->dev, "Starting the TAS\n");
633
634	if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
635	    tas_data->state == SJA1105_TAS_STATE_RUNNING) {
636		dev_err(ds->dev, "TAS already started\n");
637		return -EINVAL;
638	}
639
640	cmd->ptpstrtsch = 1;
641	cmd->ptpstopsch = 0;
642
643	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
644	if (rc < 0)
645		return rc;
646
647	tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
648
649	return 0;
650}
651
652static int sja1105_tas_stop(struct sja1105_private *priv)
653{
654	struct sja1105_tas_data *tas_data = &priv->tas_data;
655	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
656	struct dsa_switch *ds = priv->ds;
657	int rc;
658
659	dev_dbg(ds->dev, "Stopping the TAS\n");
660
661	if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
662		dev_err(ds->dev, "TAS already disabled\n");
663		return -EINVAL;
664	}
665
666	cmd->ptpstopsch = 1;
667	cmd->ptpstrtsch = 0;
668
669	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
670	if (rc < 0)
671		return rc;
672
673	tas_data->state = SJA1105_TAS_STATE_DISABLED;
674
675	return 0;
676}
677
678/* The schedule engine and the PTP clock are driven by the same oscillator, and
679 * they run in parallel. But whilst the PTP clock can keep an absolute
680 * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
681 * up a delta, which is 200ns), and wrapping around at the end of each cycle.
682 * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
683 * (in PTP domain).
684 * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
685 * a software servo, and the schedule engine clock runs in parallel to the PTP
686 * clock, there is logic internal to the switch that periodically keeps the
687 * schedule engine from drifting away. The frequency with which this internal
688 * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
689 * a value also in the PTP clock domain, and is also rate-corrected.
690 * To be precise, during a correction period, there is logic to determine by
691 * how many scheduler clock ticks has the PTP clock drifted. At the end of each
692 * correction period/beginning of new one, the length of a delta is shrunk or
693 * expanded with an integer number of ticks, compared with the typical 25.
694 * So a delta lasts for 200ns (or 25 ticks) only on average.
695 * Sometimes it is longer, sometimes it is shorter. The internal syntonization
696 * logic can adjust for at most 5 ticks each 20 ticks.
697 *
698 * The first implication is that you should choose your schedule correction
699 * period to be an integer multiple of the schedule length. Preferably one.
700 * In case there are schedules of multiple ports active, then the correction
701 * period needs to be a multiple of them all. Given the restriction that the
702 * cycle times have to be multiples of one another anyway, this means the
703 * correction period can simply be the largest cycle time, hence the current
704 * choice. This way, the updates are always synchronous to the transmission
705 * cycle, and therefore predictable.
706 *
707 * The second implication is that at the beginning of a correction period, the
708 * first few deltas will be modulated in time, until the schedule engine is
709 * properly phase-aligned with the PTP clock. For this reason, you should place
710 * your best-effort traffic at the beginning of a cycle, and your
711 * time-triggered traffic afterwards.
712 *
713 * The third implication is that once the schedule engine is started, it can
714 * only adjust for so much drift within a correction period. In the servo you
715 * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
716 * want to do the latter, you need to stop and restart the schedule engine,
717 * which is what the state machine handles.
718 */
719static void sja1105_tas_state_machine(struct work_struct *work)
720{
721	struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
722	struct sja1105_private *priv = tas_to_sja1105(tas_data);
723	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
724	struct timespec64 base_time_ts, now_ts;
725	struct dsa_switch *ds = priv->ds;
726	struct timespec64 diff;
727	s64 base_time, now;
728	int rc = 0;
729
730	mutex_lock(&ptp_data->lock);
731
732	switch (tas_data->state) {
733	case SJA1105_TAS_STATE_DISABLED:
734		/* Can't do anything at all if clock is still being stepped */
735		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
736			break;
737
738		rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
739		if (rc < 0)
740			break;
741
742		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
743		if (rc < 0)
744			break;
745
746		/* Plan to start the earliest schedule first. The others
747		 * will be started in hardware, by way of their respective
748		 * entry points delta.
749		 * Try our best to avoid fringe cases (race condition between
750		 * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
751		 * least one second in the future from now. This is not ideal,
752		 * but this only needs to buy us time until the
753		 * sja1105_tas_start command below gets executed.
754		 */
755		base_time = future_base_time(tas_data->earliest_base_time,
756					     tas_data->max_cycle_time,
757					     now + 1ull * NSEC_PER_SEC);
758		base_time -= sja1105_delta_to_ns(1);
759
760		rc = sja1105_tas_set_base_time(priv, base_time);
761		if (rc < 0)
762			break;
763
764		tas_data->oper_base_time = base_time;
765
766		rc = sja1105_tas_start(priv);
767		if (rc < 0)
768			break;
769
770		base_time_ts = ns_to_timespec64(base_time);
771		now_ts = ns_to_timespec64(now);
772
773		dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
774			base_time_ts.tv_sec, base_time_ts.tv_nsec,
775			now_ts.tv_sec, now_ts.tv_nsec);
776
777		break;
778
779	case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
780		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
781			/* Clock was stepped.. bad news for TAS */
782			sja1105_tas_stop(priv);
783			break;
784		}
785
786		/* Check if TAS has actually started, by comparing the
787		 * scheduled start time with the SJA1105 PTP clock
788		 */
789		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
790		if (rc < 0)
791			break;
792
793		if (now < tas_data->oper_base_time) {
794			/* TAS has not started yet */
795			diff = ns_to_timespec64(tas_data->oper_base_time - now);
796			dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
797				diff.tv_sec, diff.tv_nsec);
798			break;
799		}
800
801		/* Time elapsed, what happened? */
802		rc = sja1105_tas_check_running(priv);
803		if (rc < 0)
804			break;
805
806		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
807			/* TAS has started */
808			dev_err(ds->dev,
809				"TAS not started despite time elapsed\n");
810
811		break;
812
813	case SJA1105_TAS_STATE_RUNNING:
814		/* Clock was stepped.. bad news for TAS */
815		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
816			sja1105_tas_stop(priv);
817			break;
818		}
819
820		rc = sja1105_tas_check_running(priv);
821		if (rc < 0)
822			break;
823
824		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
825			dev_err(ds->dev, "TAS surprisingly stopped\n");
826
827		break;
828
829	default:
830		if (net_ratelimit())
831			dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
832	}
833
834	if (rc && net_ratelimit())
835		dev_err(ds->dev, "An operation returned %d\n", rc);
836
837	mutex_unlock(&ptp_data->lock);
838}
839
840void sja1105_tas_clockstep(struct dsa_switch *ds)
841{
842	struct sja1105_private *priv = ds->priv;
843	struct sja1105_tas_data *tas_data = &priv->tas_data;
844
845	if (!tas_data->enabled)
846		return;
847
848	tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
849	schedule_work(&tas_data->tas_work);
850}
851
852void sja1105_tas_adjfreq(struct dsa_switch *ds)
853{
854	struct sja1105_private *priv = ds->priv;
855	struct sja1105_tas_data *tas_data = &priv->tas_data;
856
857	if (!tas_data->enabled)
858		return;
859
860	/* No reason to schedule the workqueue, nothing changed */
861	if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
862		return;
863
864	tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
865	schedule_work(&tas_data->tas_work);
866}
867
868void sja1105_tas_setup(struct dsa_switch *ds)
869{
870	struct sja1105_private *priv = ds->priv;
871	struct sja1105_tas_data *tas_data = &priv->tas_data;
872
873	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
874	tas_data->state = SJA1105_TAS_STATE_DISABLED;
875	tas_data->last_op = SJA1105_PTP_NONE;
876
877	INIT_LIST_HEAD(&tas_data->gating_cfg.entries);
878}
879
880void sja1105_tas_teardown(struct dsa_switch *ds)
881{
882	struct sja1105_private *priv = ds->priv;
883	struct tc_taprio_qopt_offload *offload;
884	int port;
885
886	cancel_work_sync(&priv->tas_data.tas_work);
887
888	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
889		offload = priv->tas_data.offload[port];
890		if (!offload)
891			continue;
892
893		taprio_offload_free(offload);
894	}
895}