Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 | // SPDX-License-Identifier: MIT /* * Copyright © 2021-2023 Intel Corporation * Copyright (C) 2021-2002 Red Hat */ #include <drm/drm_managed.h> #include <drm/drm_mm.h> #include <drm/ttm/ttm_device.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_range_manager.h> #include <generated/xe_wa_oob.h> #include "regs/xe_gt_regs.h" #include "regs/xe_regs.h" #include "xe_bo.h" #include "xe_device.h" #include "xe_gt.h" #include "xe_mmio.h" #include "xe_res_cursor.h" #include "xe_sriov.h" #include "xe_ttm_stolen_mgr.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" struct xe_ttm_stolen_mgr { struct xe_ttm_vram_mgr base; /* PCI base offset */ resource_size_t io_base; /* GPU base offset */ resource_size_t stolen_base; void __iomem *mapping; }; static inline struct xe_ttm_stolen_mgr * to_stolen_mgr(struct ttm_resource_manager *man) { return container_of(man, struct xe_ttm_stolen_mgr, base.manager); } /** * xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access * stolen, can we then fallback to mapping through the GGTT. * @xe: xe device * * Some older integrated platforms don't support reliable CPU access for stolen, * however on such hardware we can always use the mappable part of the GGTT for * CPU access. Check if that's the case for this device. */ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) { return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); } static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_gt *mmio = xe_root_mmio_gt(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size; u64 tile_offset; u64 tile_size; tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start; tile_size = tile->mem.vram.actual_physical_size; /* Use DSM base address instead for stolen memory */ mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset; if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) return 0; stolen_size = tile_size - mgr->stolen_base; /* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base; /* * There may be few KB of platform dependent reserved memory at the end * of vram which is not part of the DSM. Such reserved memory portion is * always less then DSM granularity so align down the stolen_size to DSM * granularity to accommodate such reserve vram portion. */ return ALIGN_DOWN(stolen_size, SZ_1M); } static u32 get_wopcm_size(struct xe_device *xe) { u32 wopcm_size; u64 val; val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED); val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); switch (val) { case 0x5 ... 0x6: val--; fallthrough; case 0x0 ... 0x3: wopcm_size = (1U << val) * SZ_1M; break; default: WARN(1, "Missing case wopcm_size=%llx\n", val); wopcm_size = 0; } return wopcm_size; } static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt; u32 stolen_size, wopcm_size; u32 ggc, gms; ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC); /* * Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the * GTT size */ if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK)) return 0; /* * Graphics >= 1270 uses the offset to the GSMBASE as address in the * PTEs, together with the DM flag being set. Previously there was no * such flag so the address was the io_base. * * DSMBASE = GSMBASE + 8MB */ mgr->stolen_base = SZ_8M; mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base; /* return valid GMS value, -EIO if invalid */ gms = REG_FIELD_GET(GMS_MASK, ggc); switch (gms) { case 0x0 ... 0x04: stolen_size = gms * 32 * SZ_1M; break; case 0xf0 ... 0xfe: stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M; break; default: return 0; } /* Carve out the top of DSM as it contains the reserved WOPCM region */ wopcm_size = get_wopcm_size(xe); if (drm_WARN_ON(&xe->drm, !wopcm_size)) return 0; stolen_size -= wopcm_size; if (media_gt && XE_WA(media_gt, 14019821291)) { u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE) & ~GENMASK_ULL(5, 0); /* * This workaround is primarily implemented by the BIOS. We * just need to figure out whether the BIOS has applied the * workaround (meaning the programmed address falls within * the DSM) and, if so, reserve that part of the DSM to * prevent accidental reuse. The DSM location should be just * below the WOPCM. */ if (gscpsmi_base >= mgr->io_base && gscpsmi_base < mgr->io_base + stolen_size) { xe_gt_dbg(media_gt, "Reserving %llu bytes of DSM for Wa_14019821291\n", mgr->io_base + stolen_size - gscpsmi_base); stolen_size = gscpsmi_base - mgr->io_base; } } if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2))) return 0; return stolen_size; } extern struct resource intel_graphics_stolen_res; static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { #ifdef CONFIG_X86 /* Map into GGTT */ mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2); /* Stolen memory is x86 only */ mgr->stolen_base = intel_graphics_stolen_res.start; return resource_size(&intel_graphics_stolen_res); #else return 0; #endif } void xe_ttm_stolen_mgr_init(struct xe_device *xe) { struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); u64 stolen_size, io_size, pgsize; int err; if (IS_SRIOV_VF(xe)) stolen_size = 0; else if (IS_DGFX(xe)) stolen_size = detect_bar2_dgfx(xe, mgr); else if (GRAPHICS_VERx100(xe) >= 1270) stolen_size = detect_bar2_integrated(xe, mgr); else stolen_size = detect_stolen(xe, mgr); if (!stolen_size) { drm_dbg_kms(&xe->drm, "No stolen memory support\n"); return; } pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; if (pgsize < PAGE_SIZE) pgsize = PAGE_SIZE; /* * We don't try to attempt partial visible support for stolen vram, * since stolen is always at the end of vram, and the BAR size is pretty * much always 256M, with small-bar. */ io_size = 0; if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe)) io_size = stolen_size; err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size, io_size, pgsize); if (err) { drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err); return; } drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n", stolen_size); if (io_size) mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size); } u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset) { struct xe_device *xe = xe_bo_device(bo); struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr); struct xe_res_cursor cur; XE_WARN_ON(!mgr->io_base); if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) return mgr->io_base + xe_bo_ggtt_addr(bo) + offset; xe_res_first(bo->ttm.resource, offset, 4096, &cur); return mgr->io_base + cur.start; } static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr, struct ttm_resource *mem) { struct xe_res_cursor cur; if (!mgr->io_base) return -EIO; xe_res_first(mem, 0, 4096, &cur); mem->bus.offset = cur.start; drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)); if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping) mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset; mem->bus.offset += mgr->io_base; mem->bus.is_iomem = true; mem->bus.caching = ttm_write_combined; return 0; } static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr, struct ttm_resource *mem) { #ifdef CONFIG_X86 struct xe_bo *bo = ttm_to_xe_bo(mem->bo); XE_WARN_ON(IS_DGFX(xe)); /* XXX: Require BO to be mapped to GGTT? */ if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT))) return -EIO; /* GGTT is always contiguously mapped */ mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base; mem->bus.is_iomem = true; mem->bus.caching = ttm_write_combined; return 0; #else /* How is it even possible to get here without gen12 stolen? */ drm_WARN_ON(&xe->drm, 1); return -EIO; #endif } int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem) { struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN); struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL; if (!mgr || !mgr->io_base) return -EIO; if (xe_ttm_stolen_cpu_access_needs_ggtt(xe)) return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem); else return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem); } u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe) { struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN)); return mgr->stolen_base; } |