Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | // SPDX-License-Identifier: MIT /* * Copyright © 2022 Intel Corporation */ #include "xe_bb.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" #include "xe_device.h" #include "xe_exec_queue_types.h" #include "xe_gt.h" #include "xe_hw_fence.h" #include "xe_sa.h" #include "xe_sched_job.h" #include "xe_vm_types.h" static int bb_prefetch(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) /* * RCS and CCS require 1K, although other engines would be * okay with 512. */ return SZ_1K; else return SZ_512; } struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) { struct xe_tile *tile = gt_to_tile(gt); struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); int err; if (!bb) return ERR_PTR(-ENOMEM); /* * We need to allocate space for the requested number of dwords, * one additional MI_BATCH_BUFFER_END dword, and additional buffer * space to accomodate the platform-specific hardware prefetch * requirements. */ bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool, 4 * (dwords + 1) + bb_prefetch(gt)); if (IS_ERR(bb->bo)) { err = PTR_ERR(bb->bo); goto err; } bb->cs = xe_sa_bo_cpu_addr(bb->bo); bb->len = 0; return bb; err: kfree(bb); return ERR_PTR(err); } static struct xe_sched_job * __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) { u32 size = drm_suballoc_size(bb->bo); bb->cs[bb->len++] = MI_BATCH_BUFFER_END; xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); xe_sa_bo_flush_write(bb->bo); return xe_sched_job_create(q, addr); } struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 batch_base_ofs, u32 second_idx) { u64 addr[2] = { batch_base_ofs + drm_suballoc_soffset(bb->bo), batch_base_ofs + drm_suballoc_soffset(bb->bo) + 4 * second_idx, }; xe_gt_assert(q->gt, second_idx <= bb->len); xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION); return __xe_bb_create_job(q, bb, addr); } struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb) { u64 addr = xe_sa_bo_gpu_addr(bb->bo); xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION)); return __xe_bb_create_job(q, bb, &addr); } void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence) { if (!bb) return; xe_sa_bo_free(bb->bo, fence); kfree(bb); } |