Loading...
Note: File does not exist in v4.6.
1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_device.h"
7
8#include <linux/units.h>
9
10#include <drm/drm_aperture.h>
11#include <drm/drm_atomic_helper.h>
12#include <drm/drm_gem_ttm_helper.h>
13#include <drm/drm_ioctl.h>
14#include <drm/drm_managed.h>
15#include <drm/drm_print.h>
16#include <drm/xe_drm.h>
17
18#include "regs/xe_gt_regs.h"
19#include "regs/xe_regs.h"
20#include "xe_bo.h"
21#include "xe_debugfs.h"
22#include "xe_display.h"
23#include "xe_dma_buf.h"
24#include "xe_drm_client.h"
25#include "xe_drv.h"
26#include "xe_exec_queue.h"
27#include "xe_exec.h"
28#include "xe_ggtt.h"
29#include "xe_gt.h"
30#include "xe_gt_mcr.h"
31#include "xe_irq.h"
32#include "xe_mmio.h"
33#include "xe_module.h"
34#include "xe_pat.h"
35#include "xe_pcode.h"
36#include "xe_pm.h"
37#include "xe_query.h"
38#include "xe_tile.h"
39#include "xe_ttm_stolen_mgr.h"
40#include "xe_ttm_sys_mgr.h"
41#include "xe_vm.h"
42#include "xe_wait_user_fence.h"
43#include "xe_hwmon.h"
44
45#ifdef CONFIG_LOCKDEP
46struct lockdep_map xe_device_mem_access_lockdep_map = {
47 .name = "xe_device_mem_access_lockdep_map"
48};
49#endif
50
51static int xe_file_open(struct drm_device *dev, struct drm_file *file)
52{
53 struct xe_device *xe = to_xe_device(dev);
54 struct xe_drm_client *client;
55 struct xe_file *xef;
56 int ret = -ENOMEM;
57
58 xef = kzalloc(sizeof(*xef), GFP_KERNEL);
59 if (!xef)
60 return ret;
61
62 client = xe_drm_client_alloc();
63 if (!client) {
64 kfree(xef);
65 return ret;
66 }
67
68 xef->drm = file;
69 xef->client = client;
70 xef->xe = xe;
71
72 mutex_init(&xef->vm.lock);
73 xa_init_flags(&xef->vm.xa, XA_FLAGS_ALLOC1);
74
75 mutex_init(&xef->exec_queue.lock);
76 xa_init_flags(&xef->exec_queue.xa, XA_FLAGS_ALLOC1);
77
78 spin_lock(&xe->clients.lock);
79 xe->clients.count++;
80 spin_unlock(&xe->clients.lock);
81
82 file->driver_priv = xef;
83 return 0;
84}
85
86static void xe_file_close(struct drm_device *dev, struct drm_file *file)
87{
88 struct xe_device *xe = to_xe_device(dev);
89 struct xe_file *xef = file->driver_priv;
90 struct xe_vm *vm;
91 struct xe_exec_queue *q;
92 unsigned long idx;
93
94 mutex_lock(&xef->exec_queue.lock);
95 xa_for_each(&xef->exec_queue.xa, idx, q) {
96 xe_exec_queue_kill(q);
97 xe_exec_queue_put(q);
98 }
99 mutex_unlock(&xef->exec_queue.lock);
100 xa_destroy(&xef->exec_queue.xa);
101 mutex_destroy(&xef->exec_queue.lock);
102 mutex_lock(&xef->vm.lock);
103 xa_for_each(&xef->vm.xa, idx, vm)
104 xe_vm_close_and_put(vm);
105 mutex_unlock(&xef->vm.lock);
106 xa_destroy(&xef->vm.xa);
107 mutex_destroy(&xef->vm.lock);
108
109 spin_lock(&xe->clients.lock);
110 xe->clients.count--;
111 spin_unlock(&xe->clients.lock);
112
113 xe_drm_client_put(xef->client);
114 kfree(xef);
115}
116
117static const struct drm_ioctl_desc xe_ioctls[] = {
118 DRM_IOCTL_DEF_DRV(XE_DEVICE_QUERY, xe_query_ioctl, DRM_RENDER_ALLOW),
119 DRM_IOCTL_DEF_DRV(XE_GEM_CREATE, xe_gem_create_ioctl, DRM_RENDER_ALLOW),
120 DRM_IOCTL_DEF_DRV(XE_GEM_MMAP_OFFSET, xe_gem_mmap_offset_ioctl,
121 DRM_RENDER_ALLOW),
122 DRM_IOCTL_DEF_DRV(XE_VM_CREATE, xe_vm_create_ioctl, DRM_RENDER_ALLOW),
123 DRM_IOCTL_DEF_DRV(XE_VM_DESTROY, xe_vm_destroy_ioctl, DRM_RENDER_ALLOW),
124 DRM_IOCTL_DEF_DRV(XE_VM_BIND, xe_vm_bind_ioctl, DRM_RENDER_ALLOW),
125 DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
126 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_CREATE, xe_exec_queue_create_ioctl,
127 DRM_RENDER_ALLOW),
128 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY, xe_exec_queue_destroy_ioctl,
129 DRM_RENDER_ALLOW),
130 DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_GET_PROPERTY, xe_exec_queue_get_property_ioctl,
131 DRM_RENDER_ALLOW),
132 DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
133 DRM_RENDER_ALLOW),
134};
135
136static const struct file_operations xe_driver_fops = {
137 .owner = THIS_MODULE,
138 .open = drm_open,
139 .release = drm_release_noglobal,
140 .unlocked_ioctl = drm_ioctl,
141 .mmap = drm_gem_mmap,
142 .poll = drm_poll,
143 .read = drm_read,
144 .compat_ioctl = drm_compat_ioctl,
145 .llseek = noop_llseek,
146#ifdef CONFIG_PROC_FS
147 .show_fdinfo = drm_show_fdinfo,
148#endif
149};
150
151static void xe_driver_release(struct drm_device *dev)
152{
153 struct xe_device *xe = to_xe_device(dev);
154
155 pci_set_drvdata(to_pci_dev(xe->drm.dev), NULL);
156}
157
158static struct drm_driver driver = {
159 /* Don't use MTRRs here; the Xserver or userspace app should
160 * deal with them for Intel hardware.
161 */
162 .driver_features =
163 DRIVER_GEM |
164 DRIVER_RENDER | DRIVER_SYNCOBJ |
165 DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
166 .open = xe_file_open,
167 .postclose = xe_file_close,
168
169 .gem_prime_import = xe_gem_prime_import,
170
171 .dumb_create = xe_bo_dumb_create,
172 .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
173#ifdef CONFIG_PROC_FS
174 .show_fdinfo = xe_drm_client_fdinfo,
175#endif
176 .release = &xe_driver_release,
177
178 .ioctls = xe_ioctls,
179 .num_ioctls = ARRAY_SIZE(xe_ioctls),
180 .fops = &xe_driver_fops,
181 .name = DRIVER_NAME,
182 .desc = DRIVER_DESC,
183 .date = DRIVER_DATE,
184 .major = DRIVER_MAJOR,
185 .minor = DRIVER_MINOR,
186 .patchlevel = DRIVER_PATCHLEVEL,
187};
188
189static void xe_device_destroy(struct drm_device *dev, void *dummy)
190{
191 struct xe_device *xe = to_xe_device(dev);
192
193 if (xe->ordered_wq)
194 destroy_workqueue(xe->ordered_wq);
195
196 if (xe->unordered_wq)
197 destroy_workqueue(xe->unordered_wq);
198
199 ttm_device_fini(&xe->ttm);
200}
201
202struct xe_device *xe_device_create(struct pci_dev *pdev,
203 const struct pci_device_id *ent)
204{
205 struct xe_device *xe;
206 int err;
207
208 xe_display_driver_set_hooks(&driver);
209
210 err = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &driver);
211 if (err)
212 return ERR_PTR(err);
213
214 xe = devm_drm_dev_alloc(&pdev->dev, &driver, struct xe_device, drm);
215 if (IS_ERR(xe))
216 return xe;
217
218 err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
219 xe->drm.anon_inode->i_mapping,
220 xe->drm.vma_offset_manager, false, false);
221 if (WARN_ON(err))
222 goto err;
223
224 err = drmm_add_action_or_reset(&xe->drm, xe_device_destroy, NULL);
225 if (err)
226 goto err;
227
228 xe->info.devid = pdev->device;
229 xe->info.revid = pdev->revision;
230 xe->info.force_execlist = xe_modparam.force_execlist;
231
232 spin_lock_init(&xe->irq.lock);
233 spin_lock_init(&xe->clients.lock);
234
235 init_waitqueue_head(&xe->ufence_wq);
236
237 drmm_mutex_init(&xe->drm, &xe->usm.lock);
238 xa_init_flags(&xe->usm.asid_to_vm, XA_FLAGS_ALLOC);
239
240 if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
241 /* Trigger a large asid and an early asid wrap. */
242 u32 asid;
243
244 BUILD_BUG_ON(XE_MAX_ASID < 2);
245 err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, NULL,
246 XA_LIMIT(XE_MAX_ASID - 2, XE_MAX_ASID - 1),
247 &xe->usm.next_asid, GFP_KERNEL);
248 drm_WARN_ON(&xe->drm, err);
249 if (err >= 0)
250 xa_erase(&xe->usm.asid_to_vm, asid);
251 }
252
253 spin_lock_init(&xe->pinned.lock);
254 INIT_LIST_HEAD(&xe->pinned.kernel_bo_present);
255 INIT_LIST_HEAD(&xe->pinned.external_vram);
256 INIT_LIST_HEAD(&xe->pinned.evicted);
257
258 xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
259 xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
260 if (!xe->ordered_wq || !xe->unordered_wq) {
261 drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
262 err = -ENOMEM;
263 goto err;
264 }
265
266 err = xe_display_create(xe);
267 if (WARN_ON(err))
268 goto err;
269
270 return xe;
271
272err:
273 return ERR_PTR(err);
274}
275
276/*
277 * The driver-initiated FLR is the highest level of reset that we can trigger
278 * from within the driver. It is different from the PCI FLR in that it doesn't
279 * fully reset the SGUnit and doesn't modify the PCI config space and therefore
280 * it doesn't require a re-enumeration of the PCI BARs. However, the
281 * driver-initiated FLR does still cause a reset of both GT and display and a
282 * memory wipe of local and stolen memory, so recovery would require a full HW
283 * re-init and saving/restoring (or re-populating) the wiped memory. Since we
284 * perform the FLR as the very last action before releasing access to the HW
285 * during the driver release flow, we don't attempt recovery at all, because
286 * if/when a new instance of i915 is bound to the device it will do a full
287 * re-init anyway.
288 */
289static void xe_driver_flr(struct xe_device *xe)
290{
291 const unsigned int flr_timeout = 3 * MICRO; /* specs recommend a 3s wait */
292 struct xe_gt *gt = xe_root_mmio_gt(xe);
293 int ret;
294
295 if (xe_mmio_read32(gt, GU_CNTL_PROTECTED) & DRIVERINT_FLR_DIS) {
296 drm_info_once(&xe->drm, "BIOS Disabled Driver-FLR\n");
297 return;
298 }
299
300 drm_dbg(&xe->drm, "Triggering Driver-FLR\n");
301
302 /*
303 * Make sure any pending FLR requests have cleared by waiting for the
304 * FLR trigger bit to go to zero. Also clear GU_DEBUG's DRIVERFLR_STATUS
305 * to make sure it's not still set from a prior attempt (it's a write to
306 * clear bit).
307 * Note that we should never be in a situation where a previous attempt
308 * is still pending (unless the HW is totally dead), but better to be
309 * safe in case something unexpected happens
310 */
311 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
312 if (ret) {
313 drm_err(&xe->drm, "Driver-FLR-prepare wait for ready failed! %d\n", ret);
314 return;
315 }
316 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
317
318 /* Trigger the actual Driver-FLR */
319 xe_mmio_rmw32(gt, GU_CNTL, 0, DRIVERFLR);
320
321 /* Wait for hardware teardown to complete */
322 ret = xe_mmio_wait32(gt, GU_CNTL, DRIVERFLR, 0, flr_timeout, NULL, false);
323 if (ret) {
324 drm_err(&xe->drm, "Driver-FLR-teardown wait completion failed! %d\n", ret);
325 return;
326 }
327
328 /* Wait for hardware/firmware re-init to complete */
329 ret = xe_mmio_wait32(gt, GU_DEBUG, DRIVERFLR_STATUS, DRIVERFLR_STATUS,
330 flr_timeout, NULL, false);
331 if (ret) {
332 drm_err(&xe->drm, "Driver-FLR-reinit wait completion failed! %d\n", ret);
333 return;
334 }
335
336 /* Clear sticky completion status */
337 xe_mmio_write32(gt, GU_DEBUG, DRIVERFLR_STATUS);
338}
339
340static void xe_driver_flr_fini(struct drm_device *drm, void *arg)
341{
342 struct xe_device *xe = arg;
343
344 if (xe->needs_flr_on_fini)
345 xe_driver_flr(xe);
346}
347
348static void xe_device_sanitize(struct drm_device *drm, void *arg)
349{
350 struct xe_device *xe = arg;
351 struct xe_gt *gt;
352 u8 id;
353
354 for_each_gt(gt, xe, id)
355 xe_gt_sanitize(gt);
356}
357
358static int xe_set_dma_info(struct xe_device *xe)
359{
360 unsigned int mask_size = xe->info.dma_mask_size;
361 int err;
362
363 dma_set_max_seg_size(xe->drm.dev, xe_sg_segment_size(xe->drm.dev));
364
365 err = dma_set_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
366 if (err)
367 goto mask_err;
368
369 err = dma_set_coherent_mask(xe->drm.dev, DMA_BIT_MASK(mask_size));
370 if (err)
371 goto mask_err;
372
373 return 0;
374
375mask_err:
376 drm_err(&xe->drm, "Can't set DMA mask/consistent mask (%d)\n", err);
377 return err;
378}
379
380/*
381 * Initialize MMIO resources that don't require any knowledge about tile count.
382 */
383int xe_device_probe_early(struct xe_device *xe)
384{
385 int err;
386
387 err = xe_mmio_init(xe);
388 if (err)
389 return err;
390
391 err = xe_mmio_root_tile_init(xe);
392 if (err)
393 return err;
394
395 return 0;
396}
397
398static int xe_device_set_has_flat_ccs(struct xe_device *xe)
399{
400 u32 reg;
401 int err;
402
403 if (GRAPHICS_VER(xe) < 20 || !xe->info.has_flat_ccs)
404 return 0;
405
406 struct xe_gt *gt = xe_root_mmio_gt(xe);
407
408 err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
409 if (err)
410 return err;
411
412 reg = xe_gt_mcr_unicast_read_any(gt, XE2_FLAT_CCS_BASE_RANGE_LOWER);
413 xe->info.has_flat_ccs = (reg & XE2_FLAT_CCS_ENABLE);
414
415 if (!xe->info.has_flat_ccs)
416 drm_dbg(&xe->drm,
417 "Flat CCS has been disabled in bios, May lead to performance impact");
418
419 return xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
420}
421
422int xe_device_probe(struct xe_device *xe)
423{
424 struct xe_tile *tile;
425 struct xe_gt *gt;
426 int err;
427 u8 id;
428
429 xe_pat_init_early(xe);
430
431 xe->info.mem_region_mask = 1;
432 err = xe_display_init_nommio(xe);
433 if (err)
434 return err;
435
436 err = xe_set_dma_info(xe);
437 if (err)
438 return err;
439
440 xe_mmio_probe_tiles(xe);
441
442 xe_ttm_sys_mgr_init(xe);
443
444 for_each_gt(gt, xe, id)
445 xe_force_wake_init_gt(gt, gt_to_fw(gt));
446
447 for_each_tile(tile, xe, id) {
448 err = xe_ggtt_init_early(tile->mem.ggtt);
449 if (err)
450 return err;
451 }
452
453 err = drmm_add_action_or_reset(&xe->drm, xe_driver_flr_fini, xe);
454 if (err)
455 return err;
456
457 for_each_gt(gt, xe, id) {
458 err = xe_pcode_probe(gt);
459 if (err)
460 return err;
461 }
462
463 err = xe_display_init_noirq(xe);
464 if (err)
465 return err;
466
467 err = xe_irq_install(xe);
468 if (err)
469 goto err;
470
471 for_each_gt(gt, xe, id) {
472 err = xe_gt_init_early(gt);
473 if (err)
474 goto err_irq_shutdown;
475 }
476
477 err = xe_device_set_has_flat_ccs(xe);
478 if (err)
479 goto err_irq_shutdown;
480
481 err = xe_mmio_probe_vram(xe);
482 if (err)
483 goto err_irq_shutdown;
484
485 for_each_tile(tile, xe, id) {
486 err = xe_tile_init_noalloc(tile);
487 if (err)
488 goto err_irq_shutdown;
489 }
490
491 /* Allocate and map stolen after potential VRAM resize */
492 xe_ttm_stolen_mgr_init(xe);
493
494 /*
495 * Now that GT is initialized (TTM in particular),
496 * we can try to init display, and inherit the initial fb.
497 * This is the reason the first allocation needs to be done
498 * inside display.
499 */
500 err = xe_display_init_noaccel(xe);
501 if (err)
502 goto err_irq_shutdown;
503
504 for_each_gt(gt, xe, id) {
505 err = xe_gt_init(gt);
506 if (err)
507 goto err_irq_shutdown;
508 }
509
510 xe_heci_gsc_init(xe);
511
512 err = xe_display_init(xe);
513 if (err)
514 goto err_irq_shutdown;
515
516 err = drm_dev_register(&xe->drm, 0);
517 if (err)
518 goto err_fini_display;
519
520 xe_display_register(xe);
521
522 xe_debugfs_register(xe);
523
524 xe_hwmon_register(xe);
525
526 err = drmm_add_action_or_reset(&xe->drm, xe_device_sanitize, xe);
527 if (err)
528 return err;
529
530 return 0;
531
532err_fini_display:
533 xe_display_driver_remove(xe);
534
535err_irq_shutdown:
536 xe_irq_shutdown(xe);
537err:
538 xe_display_fini(xe);
539 return err;
540}
541
542static void xe_device_remove_display(struct xe_device *xe)
543{
544 xe_display_unregister(xe);
545
546 drm_dev_unplug(&xe->drm);
547 xe_display_driver_remove(xe);
548}
549
550void xe_device_remove(struct xe_device *xe)
551{
552 xe_device_remove_display(xe);
553
554 xe_display_fini(xe);
555
556 xe_heci_gsc_fini(xe);
557
558 xe_irq_shutdown(xe);
559}
560
561void xe_device_shutdown(struct xe_device *xe)
562{
563}
564
565void xe_device_wmb(struct xe_device *xe)
566{
567 struct xe_gt *gt = xe_root_mmio_gt(xe);
568
569 wmb();
570 if (IS_DGFX(xe))
571 xe_mmio_write32(gt, SOFTWARE_FLAGS_SPR33, 0);
572}
573
574u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
575{
576 return xe_device_has_flat_ccs(xe) ?
577 DIV_ROUND_UP_ULL(size, NUM_BYTES_PER_CCS_BYTE(xe)) : 0;
578}
579
580bool xe_device_mem_access_ongoing(struct xe_device *xe)
581{
582 if (xe_pm_read_callback_task(xe) != NULL)
583 return true;
584
585 return atomic_read(&xe->mem_access.ref);
586}
587
588void xe_device_assert_mem_access(struct xe_device *xe)
589{
590 XE_WARN_ON(!xe_device_mem_access_ongoing(xe));
591}
592
593bool xe_device_mem_access_get_if_ongoing(struct xe_device *xe)
594{
595 bool active;
596
597 if (xe_pm_read_callback_task(xe) == current)
598 return true;
599
600 active = xe_pm_runtime_get_if_active(xe);
601 if (active) {
602 int ref = atomic_inc_return(&xe->mem_access.ref);
603
604 xe_assert(xe, ref != S32_MAX);
605 }
606
607 return active;
608}
609
610void xe_device_mem_access_get(struct xe_device *xe)
611{
612 int ref;
613
614 /*
615 * This looks racy, but should be fine since the pm_callback_task only
616 * transitions from NULL -> current (and back to NULL again), during the
617 * runtime_resume() or runtime_suspend() callbacks, for which there can
618 * only be a single one running for our device. We only need to prevent
619 * recursively calling the runtime_get or runtime_put from those
620 * callbacks, as well as preventing triggering any access_ongoing
621 * asserts.
622 */
623 if (xe_pm_read_callback_task(xe) == current)
624 return;
625
626 /*
627 * Since the resume here is synchronous it can be quite easy to deadlock
628 * if we are not careful. Also in practice it might be quite timing
629 * sensitive to ever see the 0 -> 1 transition with the callers locks
630 * held, so deadlocks might exist but are hard for lockdep to ever see.
631 * With this in mind, help lockdep learn about the potentially scary
632 * stuff that can happen inside the runtime_resume callback by acquiring
633 * a dummy lock (it doesn't protect anything and gets compiled out on
634 * non-debug builds). Lockdep then only needs to see the
635 * mem_access_lockdep_map -> runtime_resume callback once, and then can
636 * hopefully validate all the (callers_locks) -> mem_access_lockdep_map.
637 * For example if the (callers_locks) are ever grabbed in the
638 * runtime_resume callback, lockdep should give us a nice splat.
639 */
640 lock_map_acquire(&xe_device_mem_access_lockdep_map);
641 lock_map_release(&xe_device_mem_access_lockdep_map);
642
643 xe_pm_runtime_get(xe);
644 ref = atomic_inc_return(&xe->mem_access.ref);
645
646 xe_assert(xe, ref != S32_MAX);
647
648}
649
650void xe_device_mem_access_put(struct xe_device *xe)
651{
652 int ref;
653
654 if (xe_pm_read_callback_task(xe) == current)
655 return;
656
657 ref = atomic_dec_return(&xe->mem_access.ref);
658 xe_pm_runtime_put(xe);
659
660 xe_assert(xe, ref >= 0);
661}