Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
  1// SPDX-License-Identifier: MIT
  2/*
  3 * Copyright © 2022 Intel Corporation
  4 */
  5
  6#include "xe_gt_topology.h"
  7
  8#include <generated/xe_wa_oob.h>
  9#include <linux/bitmap.h>
 10#include <linux/compiler.h>
 11
 12#include "regs/xe_gt_regs.h"
 13#include "xe_assert.h"
 14#include "xe_gt.h"
 15#include "xe_mmio.h"
 16#include "xe_wa.h"
 17
 18static void
 19load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
 20{
 21	va_list argp;
 22	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
 23	int i;
 24
 25	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
 26		numregs = XE_MAX_DSS_FUSE_REGS;
 27
 28	va_start(argp, numregs);
 29	for (i = 0; i < numregs; i++)
 30		fuse_val[i] = xe_mmio_read32(&gt->mmio, va_arg(argp, struct xe_reg));
 31	va_end(argp);
 32
 33	bitmap_from_arr32(mask, fuse_val, numregs * 32);
 34}
 35
 36static void
 37load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
 38{
 39	struct xe_device *xe = gt_to_xe(gt);
 40	u32 reg_val = xe_mmio_read32(&gt->mmio, XELP_EU_ENABLE);
 41	u32 val = 0;
 42	int i;
 43
 44	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);
 45
 46	/*
 47	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
 48	 * of enable).
 49	 */
 50	if (GRAPHICS_VERx100(xe) < 1250)
 51		reg_val = ~reg_val & XELP_EU_MASK;
 52
 53	if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
 54		/* SIMD16 EUs, one bit == one EU */
 55		*eu_type = XE_GT_EU_TYPE_SIMD16;
 56		val = reg_val;
 57	} else {
 58		/* SIMD8 EUs, one bit == 2 EU */
 59		*eu_type = XE_GT_EU_TYPE_SIMD8;
 60		for (i = 0; i < fls(reg_val); i++)
 61			if (reg_val & BIT(i))
 62				val |= 0x3 << 2 * i;
 63	}
 64
 65	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
 66}
 67
 68/**
 69 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
 70 *
 71 * It is used to compute the L3 bank masks in a generic format on
 72 * various platforms where the internal representation of L3 node
 73 * and masks from registers are different.
 74 *
 75 * @xe: device
 76 * @dst: destination
 77 * @pattern: pattern to replicate
 78 * @patternbits: size of the pattern, in bits
 79 * @mask: mask describing where to replicate the pattern
 80 *
 81 * Example 1:
 82 * ----------
 83 * @pattern =    0b1111
 84 *                 └┬─┘
 85 * @patternbits =   4 (bits)
 86 * @mask = 0b0101
 87 *           ││││
 88 *           │││└────────────────── 0b1111 (=1×0b1111)
 89 *           ││└──────────── 0b0000    │   (=0×0b1111)
 90 *           │└────── 0b1111    │      │   (=1×0b1111)
 91 *           └ 0b0000    │      │      │   (=0×0b1111)
 92 *                │      │      │      │
 93 * @dst =      0b0000 0b1111 0b0000 0b1111
 94 *
 95 * Example 2:
 96 * ----------
 97 * @pattern =    0b11111111
 98 *                 └┬─────┘
 99 * @patternbits =   8 (bits)
100 * @mask = 0b10
101 *           ││
102 *           ││
103 *           ││
104 *           │└────────── 0b00000000 (=0×0b11111111)
105 *           └ 0b11111111      │     (=1×0b11111111)
106 *                  │          │
107 * @dst =      0b11111111 0b00000000
108 */
109static void
110gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
111			 xe_l3_bank_mask_t pattern, int patternbits,
112			 unsigned long mask)
113{
114	unsigned long bit;
115
116	xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
117		  bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
118	xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
119	for_each_set_bit(bit, &mask, 32) {
120		xe_l3_bank_mask_t shifted_pattern = {};
121
122		bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
123				  XE_MAX_L3_BANK_MASK_BITS);
124		bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
125	}
126}
127
128static void
129load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
130{
131	struct xe_device *xe = gt_to_xe(gt);
132	u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
133
134	/*
135	 * PTL platforms with media version 30.00 do not provide proper values
136	 * for the media GT's L3 bank registers.  Skip the readout since we
137	 * don't have any way to obtain real values.
138	 *
139	 * This may get re-described as an official workaround in the future,
140	 * but there's no tracking number assigned yet so we use a custom
141	 * OOB workaround descriptor.
142	 */
143	if (XE_WA(gt, no_media_l3))
144		return;
145
146	if (GRAPHICS_VER(xe) >= 20) {
147		xe_l3_bank_mask_t per_node = {};
148		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
149		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);
150
151		bitmap_from_arr32(per_node, &bank_val, 32);
152		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
153					 meml3_en);
154	} else if (GRAPHICS_VERx100(xe) >= 1270) {
155		xe_l3_bank_mask_t per_node = {};
156		xe_l3_bank_mask_t per_mask_bit = {};
157		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
158		u32 fuse4 = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
159		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);
160
161		bitmap_set_value8(per_mask_bit, 0x3, 0);
162		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
163		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
164					 meml3_en);
165	} else if (xe->info.platform == XE_PVC) {
166		xe_l3_bank_mask_t per_node = {};
167		xe_l3_bank_mask_t per_mask_bit = {};
168		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
169		u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);
170
171		bitmap_set_value8(per_mask_bit, 0xf, 0);
172		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
173					 bank_val);
174		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
175					 meml3_en);
176	} else if (xe->info.platform == XE_DG2) {
177		xe_l3_bank_mask_t per_node = {};
178		u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
179
180		bitmap_set_value8(per_node, 0xff, 0);
181		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
182	} else {
183		/* 1:1 register bit to mask bit (inverted register bits) */
184		u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);
185
186		bitmap_from_arr32(l3_bank_mask, &mask, 32);
187	}
188}
189
190static void
191get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
192{
193	if (GRAPHICS_VER(xe) > 20) {
194		*geometry_regs = 3;
195		*compute_regs = 3;
196	} else if (GRAPHICS_VERx100(xe) == 1260) {
197		*geometry_regs = 0;
198		*compute_regs = 2;
199	} else if (GRAPHICS_VERx100(xe) >= 1250) {
200		*geometry_regs = 1;
201		*compute_regs = 1;
202	} else {
203		*geometry_regs = 1;
204		*compute_regs = 0;
205	}
206}
207
208void
209xe_gt_topology_init(struct xe_gt *gt)
210{
211	struct xe_device *xe = gt_to_xe(gt);
212	struct drm_printer p;
213	int num_geometry_regs, num_compute_regs;
214
215	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
216
217	/*
218	 * Register counts returned shouldn't exceed the number of registers
219	 * passed as parameters below.
220	 */
221	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
222	drm_WARN_ON(&xe->drm, num_compute_regs > 3);
223
224	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
225		      num_geometry_regs,
226		      XELP_GT_GEOMETRY_DSS_ENABLE,
227		      XE2_GT_GEOMETRY_DSS_1,
228		      XE2_GT_GEOMETRY_DSS_2);
229	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
230		      XEHP_GT_COMPUTE_DSS_ENABLE,
231		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
232		      XE2_GT_COMPUTE_DSS_2);
233	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
234	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
235
236	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
237
238	xe_gt_topology_dump(gt, &p);
239}
240
241static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
242{
243	switch (eu_type) {
244	case XE_GT_EU_TYPE_SIMD16:
245		return "simd16";
246	case XE_GT_EU_TYPE_SIMD8:
247		return "simd8";
248	}
249
250	return NULL;
251}
252
253void
254xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
255{
256	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
257		   gt->fuse_topo.g_dss_mask);
258	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
259		   gt->fuse_topo.c_dss_mask);
260
261	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
262		   gt->fuse_topo.eu_mask_per_dss);
263	drm_printf(p, "EU type:             %s\n",
264		   eu_type_to_str(gt->fuse_topo.eu_type));
265
266	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
267		   gt->fuse_topo.l3_bank_mask);
268}
269
270/*
271 * Used to obtain the index of the first DSS.  Can start searching from the
272 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
273 * groupsize and groupnum are non-zero.
274 */
275unsigned int
276xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
277{
278	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
279}
280
281bool xe_dss_mask_empty(const xe_dss_mask_t mask)
282{
283	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
284}
285
286/**
287 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
288 * @gt: GT to check
289 * @quad: Which quadrant of the DSS space to check
290 *
291 * Since Xe_HP platforms can have up to four CCS engines, those engines
292 * are each logically associated with a quarter of the possible DSS.  If there
293 * are no DSS present in one of the four quadrants of the DSS space, the
294 * corresponding CCS engine is also not available for use.
295 *
296 * Returns false if all DSS in a quadrant of the GT are fused off, else true.
297 */
298bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
299{
300	struct xe_device *xe = gt_to_xe(gt);
301	xe_dss_mask_t all_dss;
302	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
303
304	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
305		  XE_MAX_DSS_FUSE_BITS);
306
307	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
308	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
309
310	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
311
312	return quad_first < (quad + 1) * dss_per_quad;
313}
314
315bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
316{
317	return test_bit(dss, gt->fuse_topo.g_dss_mask);
318}
319
320bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
321{
322	return test_bit(dss, gt->fuse_topo.c_dss_mask);
323}