Linux Audio

Check our new training course

Linux debugging, profiling, tracing and performance analysis training

Mar 24-27, 2025, special US time zones
Register
Loading...
Note: File does not exist in v4.6.
  1// SPDX-License-Identifier: MIT
  2/*
  3 * Copyright © 2019 Intel Corporation
  4 */
  5
  6#include <linux/string_helpers.h>
  7
  8#include "i915_drv.h"
  9#include "intel_engine_regs.h"
 10#include "intel_gt_regs.h"
 11#include "intel_sseu.h"
 12
 13void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
 14			 u8 max_subslices, u8 max_eus_per_subslice)
 15{
 16	sseu->max_slices = max_slices;
 17	sseu->max_subslices = max_subslices;
 18	sseu->max_eus_per_subslice = max_eus_per_subslice;
 19}
 20
 21unsigned int
 22intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
 23{
 24	unsigned int i, total = 0;
 25
 26	if (sseu->has_xehp_dss)
 27		return bitmap_weight(sseu->subslice_mask.xehp,
 28				     XEHP_BITMAP_BITS(sseu->subslice_mask));
 29
 30	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask.hsw); i++)
 31		total += hweight8(sseu->subslice_mask.hsw[i]);
 32
 33	return total;
 34}
 35
 36unsigned int
 37intel_sseu_get_hsw_subslices(const struct sseu_dev_info *sseu, u8 slice)
 38{
 39	WARN_ON(sseu->has_xehp_dss);
 40	if (WARN_ON(slice >= sseu->max_slices))
 41		return 0;
 42
 43	return sseu->subslice_mask.hsw[slice];
 44}
 45
 46static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
 47			int subslice)
 48{
 49	if (sseu->has_xehp_dss) {
 50		WARN_ON(slice > 0);
 51		return sseu->eu_mask.xehp[subslice];
 52	} else {
 53		return sseu->eu_mask.hsw[slice][subslice];
 54	}
 55}
 56
 57static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
 58			 u16 eu_mask)
 59{
 60	GEM_WARN_ON(eu_mask && __fls(eu_mask) >= sseu->max_eus_per_subslice);
 61	if (sseu->has_xehp_dss) {
 62		GEM_WARN_ON(slice > 0);
 63		sseu->eu_mask.xehp[subslice] = eu_mask;
 64	} else {
 65		sseu->eu_mask.hsw[slice][subslice] = eu_mask;
 66	}
 67}
 68
 69static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 70{
 71	int s, ss, total = 0;
 72
 73	for (s = 0; s < sseu->max_slices; s++)
 74		for (ss = 0; ss < sseu->max_subslices; ss++)
 75			if (sseu->has_xehp_dss)
 76				total += hweight16(sseu->eu_mask.xehp[ss]);
 77			else
 78				total += hweight16(sseu->eu_mask.hsw[s][ss]);
 79
 80	return total;
 81}
 82
 83/**
 84 * intel_sseu_copy_eumask_to_user - Copy EU mask into a userspace buffer
 85 * @to: Pointer to userspace buffer to copy to
 86 * @sseu: SSEU structure containing EU mask to copy
 87 *
 88 * Copies the EU mask to a userspace buffer in the format expected by
 89 * the query ioctl's topology queries.
 90 *
 91 * Returns the result of the copy_to_user() operation.
 92 */
 93int intel_sseu_copy_eumask_to_user(void __user *to,
 94				   const struct sseu_dev_info *sseu)
 95{
 96	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE] = {};
 97	int eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
 98	int len = sseu->max_slices * sseu->max_subslices * eu_stride;
 99	int s, ss, i;
100
101	for (s = 0; s < sseu->max_slices; s++) {
102		for (ss = 0; ss < sseu->max_subslices; ss++) {
103			int uapi_offset =
104				s * sseu->max_subslices * eu_stride +
105				ss * eu_stride;
106			u16 mask = sseu_get_eus(sseu, s, ss);
107
108			for (i = 0; i < eu_stride; i++)
109				eu_mask[uapi_offset + i] =
110					(mask >> (BITS_PER_BYTE * i)) & 0xff;
111		}
112	}
113
114	return copy_to_user(to, eu_mask, len);
115}
116
117/**
118 * intel_sseu_copy_ssmask_to_user - Copy subslice mask into a userspace buffer
119 * @to: Pointer to userspace buffer to copy to
120 * @sseu: SSEU structure containing subslice mask to copy
121 *
122 * Copies the subslice mask to a userspace buffer in the format expected by
123 * the query ioctl's topology queries.
124 *
125 * Returns the result of the copy_to_user() operation.
126 */
127int intel_sseu_copy_ssmask_to_user(void __user *to,
128				   const struct sseu_dev_info *sseu)
129{
130	u8 ss_mask[GEN_SS_MASK_SIZE] = {};
131	int ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
132	int len = sseu->max_slices * ss_stride;
133	int s, ss, i;
134
135	for (s = 0; s < sseu->max_slices; s++) {
136		for (ss = 0; ss < sseu->max_subslices; ss++) {
137			i = s * ss_stride * BITS_PER_BYTE + ss;
138
139			if (!intel_sseu_has_subslice(sseu, s, ss))
140				continue;
141
142			ss_mask[i / BITS_PER_BYTE] |= BIT(i % BITS_PER_BYTE);
143		}
144	}
145
146	return copy_to_user(to, ss_mask, len);
147}
148
149static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
150				    u32 ss_en, u16 eu_en)
151{
152	u32 valid_ss_mask = GENMASK(sseu->max_subslices - 1, 0);
153	int ss;
154
155	sseu->slice_mask |= BIT(0);
156	sseu->subslice_mask.hsw[0] = ss_en & valid_ss_mask;
157
158	for (ss = 0; ss < sseu->max_subslices; ss++)
159		if (intel_sseu_has_subslice(sseu, 0, ss))
160			sseu_set_eus(sseu, 0, ss, eu_en);
161
162	sseu->eu_per_subslice = hweight16(eu_en);
163	sseu->eu_total = compute_eu_total(sseu);
164}
165
166static void xehp_compute_sseu_info(struct sseu_dev_info *sseu,
167				   u16 eu_en)
168{
169	int ss;
170
171	sseu->slice_mask |= BIT(0);
172
173	bitmap_or(sseu->subslice_mask.xehp,
174		  sseu->compute_subslice_mask.xehp,
175		  sseu->geometry_subslice_mask.xehp,
176		  XEHP_BITMAP_BITS(sseu->subslice_mask));
177
178	for (ss = 0; ss < sseu->max_subslices; ss++)
179		if (intel_sseu_has_subslice(sseu, 0, ss))
180			sseu_set_eus(sseu, 0, ss, eu_en);
181
182	sseu->eu_per_subslice = hweight16(eu_en);
183	sseu->eu_total = compute_eu_total(sseu);
184}
185
186static void
187xehp_load_dss_mask(struct intel_uncore *uncore,
188		   intel_sseu_ss_mask_t *ssmask,
189		   int numregs,
190		   ...)
191{
192	va_list argp;
193	u32 fuse_val[I915_MAX_SS_FUSE_REGS] = {};
194	int i;
195
196	if (WARN_ON(numregs > I915_MAX_SS_FUSE_REGS))
197		numregs = I915_MAX_SS_FUSE_REGS;
198
199	va_start(argp, numregs);
200	for (i = 0; i < numregs; i++)
201		fuse_val[i] = intel_uncore_read(uncore, va_arg(argp, i915_reg_t));
202	va_end(argp);
203
204	bitmap_from_arr32(ssmask->xehp, fuse_val, numregs * 32);
205}
206
207static void xehp_sseu_info_init(struct intel_gt *gt)
208{
209	struct sseu_dev_info *sseu = &gt->info.sseu;
210	struct intel_uncore *uncore = gt->uncore;
211	u16 eu_en = 0;
212	u8 eu_en_fuse;
213	int num_compute_regs, num_geometry_regs;
214	int eu;
215
216	if (IS_PONTEVECCHIO(gt->i915)) {
217		num_geometry_regs = 0;
218		num_compute_regs = 2;
219	} else {
220		num_geometry_regs = 1;
221		num_compute_regs = 1;
222	}
223
224	/*
225	 * The concept of slice has been removed in Xe_HP.  To be compatible
226	 * with prior generations, assume a single slice across the entire
227	 * device. Then calculate out the DSS for each workload type within
228	 * that software slice.
229	 */
230	intel_sseu_set_info(sseu, 1,
231			    32 * max(num_geometry_regs, num_compute_regs),
232			    HAS_ONE_EU_PER_FUSE_BIT(gt->i915) ? 8 : 16);
233	sseu->has_xehp_dss = 1;
234
235	xehp_load_dss_mask(uncore, &sseu->geometry_subslice_mask,
236			   num_geometry_regs,
237			   GEN12_GT_GEOMETRY_DSS_ENABLE);
238	xehp_load_dss_mask(uncore, &sseu->compute_subslice_mask,
239			   num_compute_regs,
240			   GEN12_GT_COMPUTE_DSS_ENABLE,
241			   XEHPC_GT_COMPUTE_DSS_ENABLE_EXT);
242
243	eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
244
245	if (HAS_ONE_EU_PER_FUSE_BIT(gt->i915))
246		eu_en = eu_en_fuse;
247	else
248		for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
249			if (eu_en_fuse & BIT(eu))
250				eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
251
252	xehp_compute_sseu_info(sseu, eu_en);
253}
254
255static void gen12_sseu_info_init(struct intel_gt *gt)
256{
257	struct sseu_dev_info *sseu = &gt->info.sseu;
258	struct intel_uncore *uncore = gt->uncore;
259	u32 g_dss_en;
260	u16 eu_en = 0;
261	u8 eu_en_fuse;
262	u8 s_en;
263	int eu;
264
265	/*
266	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
267	 * Instead of splitting these, provide userspace with an array
268	 * of DSS to more closely represent the hardware resource.
269	 */
270	intel_sseu_set_info(sseu, 1, 6, 16);
271
272	/*
273	 * Although gen12 architecture supported multiple slices, TGL, RKL,
274	 * DG1, and ADL only had a single slice.
275	 */
276	s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
277		GEN11_GT_S_ENA_MASK;
278	drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
279
280	g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
281
282	/* one bit per pair of EUs */
283	eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
284		       GEN11_EU_DIS_MASK);
285
286	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
287		if (eu_en_fuse & BIT(eu))
288			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
289
290	gen11_compute_sseu_info(sseu, g_dss_en, eu_en);
291
292	/* TGL only supports slice-level power gating */
293	sseu->has_slice_pg = 1;
294}
295
296static void gen11_sseu_info_init(struct intel_gt *gt)
297{
298	struct sseu_dev_info *sseu = &gt->info.sseu;
299	struct intel_uncore *uncore = gt->uncore;
300	u32 ss_en;
301	u8 eu_en;
302	u8 s_en;
303
304	if (IS_JSL_EHL(gt->i915))
305		intel_sseu_set_info(sseu, 1, 4, 8);
306	else
307		intel_sseu_set_info(sseu, 1, 8, 8);
308
309	/*
310	 * Although gen11 architecture supported multiple slices, ICL and
311	 * EHL/JSL only had a single slice in practice.
312	 */
313	s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
314		GEN11_GT_S_ENA_MASK;
315	drm_WARN_ON(&gt->i915->drm, s_en != 0x1);
316
317	ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
318
319	eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
320		  GEN11_EU_DIS_MASK);
321
322	gen11_compute_sseu_info(sseu, ss_en, eu_en);
323
324	/* ICL has no power gating restrictions. */
325	sseu->has_slice_pg = 1;
326	sseu->has_subslice_pg = 1;
327	sseu->has_eu_pg = 1;
328}
329
330static void cherryview_sseu_info_init(struct intel_gt *gt)
331{
332	struct sseu_dev_info *sseu = &gt->info.sseu;
333	u32 fuse;
334
335	fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
336
337	sseu->slice_mask = BIT(0);
338	intel_sseu_set_info(sseu, 1, 2, 8);
339
340	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
341		u8 disabled_mask =
342			((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
343			 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
344			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
345			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
346
347		sseu->subslice_mask.hsw[0] |= BIT(0);
348		sseu_set_eus(sseu, 0, 0, ~disabled_mask & 0xFF);
349	}
350
351	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
352		u8 disabled_mask =
353			((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
354			 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
355			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
356			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
357
358		sseu->subslice_mask.hsw[0] |= BIT(1);
359		sseu_set_eus(sseu, 0, 1, ~disabled_mask & 0xFF);
360	}
361
362	sseu->eu_total = compute_eu_total(sseu);
363
364	/*
365	 * CHV expected to always have a uniform distribution of EU
366	 * across subslices.
367	 */
368	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
369		sseu->eu_total /
370		intel_sseu_subslice_total(sseu) :
371		0;
372	/*
373	 * CHV supports subslice power gating on devices with more than
374	 * one subslice, and supports EU power gating on devices with
375	 * more than one EU pair per subslice.
376	 */
377	sseu->has_slice_pg = 0;
378	sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
379	sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
380}
381
382static void gen9_sseu_info_init(struct intel_gt *gt)
383{
384	struct drm_i915_private *i915 = gt->i915;
385	struct sseu_dev_info *sseu = &gt->info.sseu;
386	struct intel_uncore *uncore = gt->uncore;
387	u32 fuse2, eu_disable, subslice_mask;
388	const u8 eu_mask = 0xff;
389	int s, ss;
390
391	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
392	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
393
394	/* BXT has a single slice and at most 3 subslices. */
395	intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
396			    IS_GEN9_LP(i915) ? 3 : 4, 8);
397
398	/*
399	 * The subslice disable field is global, i.e. it applies
400	 * to each of the enabled slices.
401	 */
402	subslice_mask = (1 << sseu->max_subslices) - 1;
403	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
404			   GEN9_F2_SS_DIS_SHIFT);
405
406	/*
407	 * Iterate through enabled slices and subslices to
408	 * count the total enabled EU.
409	 */
410	for (s = 0; s < sseu->max_slices; s++) {
411		if (!(sseu->slice_mask & BIT(s)))
412			/* skip disabled slice */
413			continue;
414
415		sseu->subslice_mask.hsw[s] = subslice_mask;
416
417		eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
418		for (ss = 0; ss < sseu->max_subslices; ss++) {
419			int eu_per_ss;
420			u8 eu_disabled_mask;
421
422			if (!intel_sseu_has_subslice(sseu, s, ss))
423				/* skip disabled subslice */
424				continue;
425
426			eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
427
428			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & eu_mask);
429
430			eu_per_ss = sseu->max_eus_per_subslice -
431				hweight8(eu_disabled_mask);
432
433			/*
434			 * Record which subslice(s) has(have) 7 EUs. we
435			 * can tune the hash used to spread work among
436			 * subslices if they are unbalanced.
437			 */
438			if (eu_per_ss == 7)
439				sseu->subslice_7eu[s] |= BIT(ss);
440		}
441	}
442
443	sseu->eu_total = compute_eu_total(sseu);
444
445	/*
446	 * SKL is expected to always have a uniform distribution
447	 * of EU across subslices with the exception that any one
448	 * EU in any one subslice may be fused off for die
449	 * recovery. BXT is expected to be perfectly uniform in EU
450	 * distribution.
451	 */
452	sseu->eu_per_subslice =
453		intel_sseu_subslice_total(sseu) ?
454		DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
455		0;
456
457	/*
458	 * SKL+ supports slice power gating on devices with more than
459	 * one slice, and supports EU power gating on devices with
460	 * more than one EU pair per subslice. BXT+ supports subslice
461	 * power gating on devices with more than one subslice, and
462	 * supports EU power gating on devices with more than one EU
463	 * pair per subslice.
464	 */
465	sseu->has_slice_pg =
466		!IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
467	sseu->has_subslice_pg =
468		IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
469	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
470
471	if (IS_GEN9_LP(i915)) {
472#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask.hsw[0] & BIT(ss)))
473		RUNTIME_INFO(i915)->has_pooled_eu = hweight8(sseu->subslice_mask.hsw[0]) == 3;
474
475		sseu->min_eu_in_pool = 0;
476		if (HAS_POOLED_EU(i915)) {
477			if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0))
478				sseu->min_eu_in_pool = 3;
479			else if (IS_SS_DISABLED(1))
480				sseu->min_eu_in_pool = 6;
481			else
482				sseu->min_eu_in_pool = 9;
483		}
484#undef IS_SS_DISABLED
485	}
486}
487
488static void bdw_sseu_info_init(struct intel_gt *gt)
489{
490	struct sseu_dev_info *sseu = &gt->info.sseu;
491	struct intel_uncore *uncore = gt->uncore;
492	int s, ss;
493	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
494	u32 eu_disable0, eu_disable1, eu_disable2;
495
496	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
497	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
498	intel_sseu_set_info(sseu, 3, 3, 8);
499
500	/*
501	 * The subslice disable field is global, i.e. it applies
502	 * to each of the enabled slices.
503	 */
504	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
505	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
506			   GEN8_F2_SS_DIS_SHIFT);
507	eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
508	eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
509	eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
510	eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
511	eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
512		((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
513		 (32 - GEN8_EU_DIS0_S1_SHIFT));
514	eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
515		((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
516		 (32 - GEN8_EU_DIS1_S2_SHIFT));
517
518	/*
519	 * Iterate through enabled slices and subslices to
520	 * count the total enabled EU.
521	 */
522	for (s = 0; s < sseu->max_slices; s++) {
523		if (!(sseu->slice_mask & BIT(s)))
524			/* skip disabled slice */
525			continue;
526
527		sseu->subslice_mask.hsw[s] = subslice_mask;
528
529		for (ss = 0; ss < sseu->max_subslices; ss++) {
530			u8 eu_disabled_mask;
531			u32 n_disabled;
532
533			if (!intel_sseu_has_subslice(sseu, s, ss))
534				/* skip disabled subslice */
535				continue;
536
537			eu_disabled_mask =
538				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
539
540			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask & 0xFF);
541
542			n_disabled = hweight8(eu_disabled_mask);
543
544			/*
545			 * Record which subslices have 7 EUs.
546			 */
547			if (sseu->max_eus_per_subslice - n_disabled == 7)
548				sseu->subslice_7eu[s] |= 1 << ss;
549		}
550	}
551
552	sseu->eu_total = compute_eu_total(sseu);
553
554	/*
555	 * BDW is expected to always have a uniform distribution of EU across
556	 * subslices with the exception that any one EU in any one subslice may
557	 * be fused off for die recovery.
558	 */
559	sseu->eu_per_subslice =
560		intel_sseu_subslice_total(sseu) ?
561		DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
562		0;
563
564	/*
565	 * BDW supports slice power gating on devices with more than
566	 * one slice.
567	 */
568	sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
569	sseu->has_subslice_pg = 0;
570	sseu->has_eu_pg = 0;
571}
572
573static void hsw_sseu_info_init(struct intel_gt *gt)
574{
575	struct drm_i915_private *i915 = gt->i915;
576	struct sseu_dev_info *sseu = &gt->info.sseu;
577	u32 fuse1;
578	u8 subslice_mask = 0;
579	int s, ss;
580
581	/*
582	 * There isn't a register to tell us how many slices/subslices. We
583	 * work off the PCI-ids here.
584	 */
585	switch (INTEL_INFO(i915)->gt) {
586	default:
587		MISSING_CASE(INTEL_INFO(i915)->gt);
588		fallthrough;
589	case 1:
590		sseu->slice_mask = BIT(0);
591		subslice_mask = BIT(0);
592		break;
593	case 2:
594		sseu->slice_mask = BIT(0);
595		subslice_mask = BIT(0) | BIT(1);
596		break;
597	case 3:
598		sseu->slice_mask = BIT(0) | BIT(1);
599		subslice_mask = BIT(0) | BIT(1);
600		break;
601	}
602
603	fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
604	switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) {
605	default:
606		MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1));
607		fallthrough;
608	case HSW_F1_EU_DIS_10EUS:
609		sseu->eu_per_subslice = 10;
610		break;
611	case HSW_F1_EU_DIS_8EUS:
612		sseu->eu_per_subslice = 8;
613		break;
614	case HSW_F1_EU_DIS_6EUS:
615		sseu->eu_per_subslice = 6;
616		break;
617	}
618
619	intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
620			    hweight8(subslice_mask),
621			    sseu->eu_per_subslice);
622
623	for (s = 0; s < sseu->max_slices; s++) {
624		sseu->subslice_mask.hsw[s] = subslice_mask;
625
626		for (ss = 0; ss < sseu->max_subslices; ss++) {
627			sseu_set_eus(sseu, s, ss,
628				     (1UL << sseu->eu_per_subslice) - 1);
629		}
630	}
631
632	sseu->eu_total = compute_eu_total(sseu);
633
634	/* No powergating for you. */
635	sseu->has_slice_pg = 0;
636	sseu->has_subslice_pg = 0;
637	sseu->has_eu_pg = 0;
638}
639
640void intel_sseu_info_init(struct intel_gt *gt)
641{
642	struct drm_i915_private *i915 = gt->i915;
643
644	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
645		xehp_sseu_info_init(gt);
646	else if (GRAPHICS_VER(i915) >= 12)
647		gen12_sseu_info_init(gt);
648	else if (GRAPHICS_VER(i915) >= 11)
649		gen11_sseu_info_init(gt);
650	else if (GRAPHICS_VER(i915) >= 9)
651		gen9_sseu_info_init(gt);
652	else if (IS_BROADWELL(i915))
653		bdw_sseu_info_init(gt);
654	else if (IS_CHERRYVIEW(i915))
655		cherryview_sseu_info_init(gt);
656	else if (IS_HASWELL(i915))
657		hsw_sseu_info_init(gt);
658}
659
660u32 intel_sseu_make_rpcs(struct intel_gt *gt,
661			 const struct intel_sseu *req_sseu)
662{
663	struct drm_i915_private *i915 = gt->i915;
664	const struct sseu_dev_info *sseu = &gt->info.sseu;
665	bool subslice_pg = sseu->has_subslice_pg;
666	u8 slices, subslices;
667	u32 rpcs = 0;
668
669	/*
670	 * No explicit RPCS request is needed to ensure full
671	 * slice/subslice/EU enablement prior to Gen9.
672	 */
673	if (GRAPHICS_VER(i915) < 9)
674		return 0;
675
676	/*
677	 * If i915/perf is active, we want a stable powergating configuration
678	 * on the system. Use the configuration pinned by i915/perf.
679	 */
680	if (gt->perf.exclusive_stream)
681		req_sseu = &gt->perf.sseu;
682
683	slices = hweight8(req_sseu->slice_mask);
684	subslices = hweight8(req_sseu->subslice_mask);
685
686	/*
687	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
688	 * wide and Icelake has up to eight subslices, specfial programming is
689	 * needed in order to correctly enable all subslices.
690	 *
691	 * According to documentation software must consider the configuration
692	 * as 2x4x8 and hardware will translate this to 1x8x8.
693	 *
694	 * Furthemore, even though SScount is three bits, maximum documented
695	 * value for it is four. From this some rules/restrictions follow:
696	 *
697	 * 1.
698	 * If enabled subslice count is greater than four, two whole slices must
699	 * be enabled instead.
700	 *
701	 * 2.
702	 * When more than one slice is enabled, hardware ignores the subslice
703	 * count altogether.
704	 *
705	 * From these restrictions it follows that it is not possible to enable
706	 * a count of subslices between the SScount maximum of four restriction,
707	 * and the maximum available number on a particular SKU. Either all
708	 * subslices are enabled, or a count between one and four on the first
709	 * slice.
710	 */
711	if (GRAPHICS_VER(i915) == 11 &&
712	    slices == 1 &&
713	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask.hsw[0]) / 2)) {
714		GEM_BUG_ON(subslices & 1);
715
716		subslice_pg = false;
717		slices *= 2;
718	}
719
720	/*
721	 * Starting in Gen9, render power gating can leave
722	 * slice/subslice/EU in a partially enabled state. We
723	 * must make an explicit request through RPCS for full
724	 * enablement.
725	 */
726	if (sseu->has_slice_pg) {
727		u32 mask, val = slices;
728
729		if (GRAPHICS_VER(i915) >= 11) {
730			mask = GEN11_RPCS_S_CNT_MASK;
731			val <<= GEN11_RPCS_S_CNT_SHIFT;
732		} else {
733			mask = GEN8_RPCS_S_CNT_MASK;
734			val <<= GEN8_RPCS_S_CNT_SHIFT;
735		}
736
737		GEM_BUG_ON(val & ~mask);
738		val &= mask;
739
740		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
741	}
742
743	if (subslice_pg) {
744		u32 val = subslices;
745
746		val <<= GEN8_RPCS_SS_CNT_SHIFT;
747
748		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
749		val &= GEN8_RPCS_SS_CNT_MASK;
750
751		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
752	}
753
754	if (sseu->has_eu_pg) {
755		u32 val;
756
757		val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
758		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
759		val &= GEN8_RPCS_EU_MIN_MASK;
760
761		rpcs |= val;
762
763		val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
764		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
765		val &= GEN8_RPCS_EU_MAX_MASK;
766
767		rpcs |= val;
768
769		rpcs |= GEN8_RPCS_ENABLE;
770	}
771
772	return rpcs;
773}
774
775void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
776{
777	int s;
778
779	if (sseu->has_xehp_dss) {
780		drm_printf(p, "subslice total: %u\n",
781			   intel_sseu_subslice_total(sseu));
782		drm_printf(p, "geometry dss mask=%*pb\n",
783			   XEHP_BITMAP_BITS(sseu->geometry_subslice_mask),
784			   sseu->geometry_subslice_mask.xehp);
785		drm_printf(p, "compute dss mask=%*pb\n",
786			   XEHP_BITMAP_BITS(sseu->compute_subslice_mask),
787			   sseu->compute_subslice_mask.xehp);
788	} else {
789		drm_printf(p, "slice total: %u, mask=%04x\n",
790			   hweight8(sseu->slice_mask), sseu->slice_mask);
791		drm_printf(p, "subslice total: %u\n",
792			   intel_sseu_subslice_total(sseu));
793
794		for (s = 0; s < sseu->max_slices; s++) {
795			u8 ss_mask = sseu->subslice_mask.hsw[s];
796
797			drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
798				   s, hweight8(ss_mask), ss_mask);
799		}
800	}
801
802	drm_printf(p, "EU total: %u\n", sseu->eu_total);
803	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
804	drm_printf(p, "has slice power gating: %s\n",
805		   str_yes_no(sseu->has_slice_pg));
806	drm_printf(p, "has subslice power gating: %s\n",
807		   str_yes_no(sseu->has_subslice_pg));
808	drm_printf(p, "has EU power gating: %s\n",
809		   str_yes_no(sseu->has_eu_pg));
810}
811
812static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
813				    struct drm_printer *p)
814{
815	int s, ss;
816
817	for (s = 0; s < sseu->max_slices; s++) {
818		u8 ss_mask = sseu->subslice_mask.hsw[s];
819
820		drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
821			   s, hweight8(ss_mask), ss_mask);
822
823		for (ss = 0; ss < sseu->max_subslices; ss++) {
824			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
825
826			drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
827				   ss, hweight16(enabled_eus), enabled_eus);
828		}
829	}
830}
831
832static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
833				     struct drm_printer *p)
834{
835	int dss;
836
837	for (dss = 0; dss < sseu->max_subslices; dss++) {
838		u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
839
840		drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
841			   str_yes_no(test_bit(dss, sseu->geometry_subslice_mask.xehp)),
842			   str_yes_no(test_bit(dss, sseu->compute_subslice_mask.xehp)),
843			   hweight16(enabled_eus), enabled_eus);
844	}
845}
846
847void intel_sseu_print_topology(struct drm_i915_private *i915,
848			       const struct sseu_dev_info *sseu,
849			       struct drm_printer *p)
850{
851	if (sseu->max_slices == 0) {
852		drm_printf(p, "Unavailable\n");
853	} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
854		sseu_print_xehp_topology(sseu, p);
855	} else {
856		sseu_print_hsw_topology(sseu, p);
857	}
858}
859
860void intel_sseu_print_ss_info(const char *type,
861			      const struct sseu_dev_info *sseu,
862			      struct seq_file *m)
863{
864	int s;
865
866	if (sseu->has_xehp_dss) {
867		seq_printf(m, "  %s Geometry DSS: %u\n", type,
868			   bitmap_weight(sseu->geometry_subslice_mask.xehp,
869					 XEHP_BITMAP_BITS(sseu->geometry_subslice_mask)));
870		seq_printf(m, "  %s Compute DSS: %u\n", type,
871			   bitmap_weight(sseu->compute_subslice_mask.xehp,
872					 XEHP_BITMAP_BITS(sseu->compute_subslice_mask)));
873	} else {
874		for (s = 0; s < fls(sseu->slice_mask); s++)
875			seq_printf(m, "  %s Slice%i subslices: %u\n", type,
876				   s, hweight8(sseu->subslice_mask.hsw[s]));
877	}
878}
879
880u16 intel_slicemask_from_xehp_dssmask(intel_sseu_ss_mask_t dss_mask,
881				      int dss_per_slice)
882{
883	intel_sseu_ss_mask_t per_slice_mask = {};
884	unsigned long slice_mask = 0;
885	int i;
886
887	WARN_ON(DIV_ROUND_UP(XEHP_BITMAP_BITS(dss_mask), dss_per_slice) >
888		8 * sizeof(slice_mask));
889
890	bitmap_fill(per_slice_mask.xehp, dss_per_slice);
891	for (i = 0; !bitmap_empty(dss_mask.xehp, XEHP_BITMAP_BITS(dss_mask)); i++) {
892		if (bitmap_intersects(dss_mask.xehp, per_slice_mask.xehp, dss_per_slice))
893			slice_mask |= BIT(i);
894
895		bitmap_shift_right(dss_mask.xehp, dss_mask.xehp, dss_per_slice,
896				   XEHP_BITMAP_BITS(dss_mask));
897	}
898
899	return slice_mask;
900}