Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 | /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ #ifndef __IOMMUFD_PRIVATE_H #define __IOMMUFD_PRIVATE_H #include <linux/rwsem.h> #include <linux/xarray.h> #include <linux/refcount.h> #include <linux/uaccess.h> #include <linux/iommu.h> #include <linux/iova_bitmap.h> #include <uapi/linux/iommufd.h> struct iommu_domain; struct iommu_group; struct iommu_option; struct iommufd_device; struct iommufd_ctx { struct file *file; struct xarray objects; struct xarray groups; wait_queue_head_t destroy_wait; u8 account_mode; /* Compatibility with VFIO no iommu */ u8 no_iommu_mode; struct iommufd_ioas *vfio_ioas; }; /* * The IOVA to PFN map. The map automatically copies the PFNs into multiple * domains and permits sharing of PFNs between io_pagetable instances. This * supports both a design where IOAS's are 1:1 with a domain (eg because the * domain is HW customized), or where the IOAS is 1:N with multiple generic * domains. The io_pagetable holds an interval tree of iopt_areas which point * to shared iopt_pages which hold the pfns mapped to the page table. * * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex */ struct io_pagetable { struct rw_semaphore domains_rwsem; struct xarray domains; struct xarray access_list; unsigned int next_domain_id; struct rw_semaphore iova_rwsem; struct rb_root_cached area_itree; /* IOVA that cannot become reserved, struct iopt_allowed */ struct rb_root_cached allowed_itree; /* IOVA that cannot be allocated, struct iopt_reserved */ struct rb_root_cached reserved_itree; u8 disable_large_pages; unsigned long iova_alignment; }; void iopt_init_table(struct io_pagetable *iopt); void iopt_destroy_table(struct io_pagetable *iopt); int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, unsigned long length, struct list_head *pages_list); void iopt_free_pages_list(struct list_head *pages_list); enum { IOPT_ALLOC_IOVA = 1 << 0, }; int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, unsigned long *iova, void __user *uptr, unsigned long length, int iommu_prot, unsigned int flags); int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, unsigned long length, unsigned long *dst_iova, int iommu_prot, unsigned int flags); int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, unsigned long length, unsigned long *unmapped); int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, struct iommu_domain *domain, unsigned long flags, struct iommu_hwpt_get_dirty_bitmap *bitmap); int iopt_set_dirty_tracking(struct io_pagetable *iopt, struct iommu_domain *domain, bool enable); void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, unsigned long length); int iopt_table_add_domain(struct io_pagetable *iopt, struct iommu_domain *domain); void iopt_table_remove_domain(struct io_pagetable *iopt, struct iommu_domain *domain); int iopt_table_enforce_dev_resv_regions(struct io_pagetable *iopt, struct device *dev, phys_addr_t *sw_msi_start); int iopt_set_allow_iova(struct io_pagetable *iopt, struct rb_root_cached *allowed_iova); int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, unsigned long last, void *owner); void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner); int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, size_t num_iovas); void iopt_enable_large_pages(struct io_pagetable *iopt); int iopt_disable_large_pages(struct io_pagetable *iopt); struct iommufd_ucmd { struct iommufd_ctx *ictx; void __user *ubuffer; u32 user_size; void *cmd; }; int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, unsigned long arg); /* Copy the response in ucmd->cmd back to userspace. */ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, size_t cmd_len) { if (copy_to_user(ucmd->ubuffer, ucmd->cmd, min_t(size_t, ucmd->user_size, cmd_len))) return -EFAULT; return 0; } enum iommufd_object_type { IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_DEVICE, IOMMUFD_OBJ_HWPT_PAGING, IOMMUFD_OBJ_HWPT_NESTED, IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, #ifdef CONFIG_IOMMUFD_TEST IOMMUFD_OBJ_SELFTEST, #endif IOMMUFD_OBJ_MAX, }; /* Base struct for all objects with a userspace ID handle. */ struct iommufd_object { refcount_t shortterm_users; refcount_t users; enum iommufd_object_type type; unsigned int id; }; static inline bool iommufd_lock_obj(struct iommufd_object *obj) { if (!refcount_inc_not_zero(&obj->users)) return false; if (!refcount_inc_not_zero(&obj->shortterm_users)) { /* * If the caller doesn't already have a ref on obj this must be * called under the xa_lock. Otherwise the caller is holding a * ref on users. Thus it cannot be one before this decrement. */ refcount_dec(&obj->users); return false; } return true; } struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, enum iommufd_object_type type); static inline void iommufd_put_object(struct iommufd_ctx *ictx, struct iommufd_object *obj) { /* * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees * a spurious !0 users with a 0 shortterm_users. */ refcount_dec(&obj->users); if (refcount_dec_and_test(&obj->shortterm_users)) wake_up_interruptible_all(&ictx->destroy_wait); } void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj); void iommufd_object_finalize(struct iommufd_ctx *ictx, struct iommufd_object *obj); enum { REMOVE_WAIT_SHORTTERM = 1, }; int iommufd_object_remove(struct iommufd_ctx *ictx, struct iommufd_object *to_destroy, u32 id, unsigned int flags); /* * The caller holds a users refcount and wants to destroy the object. At this * point the caller has no shortterm_users reference and at least the xarray * will be holding one. */ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, struct iommufd_object *obj) { int ret; ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); /* * If there is a bug and we couldn't destroy the object then we did put * back the caller's users refcount and will eventually try to free it * again during close. */ WARN_ON(ret); } /* * The HWPT allocated by autodomains is used in possibly many devices and * is automatically destroyed when its refcount reaches zero. * * If userspace uses the HWPT manually, even for a short term, then it will * disrupt this refcounting and the auto-free in the kernel will not work. * Userspace that tries to use the automatically allocated HWPT must be careful * to ensure that it is consistently destroyed, eg by not racing accesses * and by not attaching an automatic HWPT to a device manually. */ static inline void iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj) { iommufd_object_remove(ictx, obj, obj->id, 0); } struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type); #define __iommufd_object_alloc(ictx, ptr, type, obj) \ container_of(_iommufd_object_alloc( \ ictx, \ sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ offsetof(typeof(*(ptr)), \ obj) != 0), \ type), \ typeof(*(ptr)), obj) #define iommufd_object_alloc(ictx, ptr, type) \ __iommufd_object_alloc(ictx, ptr, type, obj) /* * The IO Address Space (IOAS) pagetable is a virtual page table backed by the * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The * mapping is copied into all of the associated domains and made available to * in-kernel users. * * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable * object. When we go to attach a device to an IOAS we need to get an * iommu_domain and wrapping iommufd_hw_pagetable for it. * * An iommu_domain & iommfd_hw_pagetable will be automatically selected * for a device based on the hwpt_list. If no suitable iommu_domain * is found a new iommu_domain will be created. */ struct iommufd_ioas { struct iommufd_object obj; struct io_pagetable iopt; struct mutex mutex; struct list_head hwpt_list; }; static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ctx *ictx, u32 id) { return container_of(iommufd_get_object(ictx, id, IOMMUFD_OBJ_IOAS), struct iommufd_ioas, obj); } struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx); int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd); void iommufd_ioas_destroy(struct iommufd_object *obj); int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); int iommufd_ioas_map(struct iommufd_ucmd *ucmd); int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); int iommufd_ioas_option(struct iommufd_ucmd *ucmd); int iommufd_option_rlimit_mode(struct iommu_option *cmd, struct iommufd_ctx *ictx); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); int iommufd_check_iova_range(struct io_pagetable *iopt, struct iommu_hwpt_get_dirty_bitmap *bitmap); /* * A HW pagetable is called an iommu_domain inside the kernel. This user object * allows directly creating and inspecting the domains. Domains that have kernel * owned page tables will be associated with an iommufd_ioas that provides the * IOVA to PFN map. */ struct iommufd_hw_pagetable { struct iommufd_object obj; struct iommu_domain *domain; }; struct iommufd_hwpt_paging { struct iommufd_hw_pagetable common; struct iommufd_ioas *ioas; bool auto_domain : 1; bool enforce_cache_coherency : 1; bool msi_cookie : 1; bool nest_parent : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; }; struct iommufd_hwpt_nested { struct iommufd_hw_pagetable common; struct iommufd_hwpt_paging *parent; }; static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) { return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING; } static inline struct iommufd_hwpt_paging * to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) { return container_of(hwpt, struct iommufd_hwpt_paging, common); } static inline struct iommufd_hwpt_paging * iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) { return container_of(iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_HWPT_PAGING), struct iommufd_hwpt_paging, common.obj); } static inline struct iommufd_hw_pagetable * iommufd_get_hwpt_nested(struct iommufd_ucmd *ucmd, u32 id) { return container_of(iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_HWPT_NESTED), struct iommufd_hw_pagetable, obj); } int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); struct iommufd_hwpt_paging * iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, struct iommufd_device *idev, u32 flags, bool immediate_attach, const struct iommu_user_data *user_data); int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev); struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev); void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); void iommufd_hwpt_paging_abort(struct iommufd_object *obj); void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); void iommufd_hwpt_nested_abort(struct iommufd_object *obj); int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt) { if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) { struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt); lockdep_assert_not_held(&hwpt_paging->ioas->mutex); if (hwpt_paging->auto_domain) { iommufd_object_put_and_try_destroy(ictx, &hwpt->obj); return; } } refcount_dec(&hwpt->obj.users); } struct iommufd_group { struct kref ref; struct mutex lock; struct iommufd_ctx *ictx; struct iommu_group *group; struct iommufd_hw_pagetable *hwpt; struct list_head device_list; phys_addr_t sw_msi_start; }; /* * A iommufd_device object represents the binding relationship between a * consuming driver and the iommufd. These objects are created/destroyed by * external drivers, not by userspace. */ struct iommufd_device { struct iommufd_object obj; struct iommufd_ctx *ictx; struct iommufd_group *igroup; struct list_head group_item; /* always the physical device */ struct device *dev; bool enforce_cache_coherency; }; static inline struct iommufd_device * iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) { return container_of(iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_DEVICE), struct iommufd_device, obj); } void iommufd_device_destroy(struct iommufd_object *obj); int iommufd_get_hw_info(struct iommufd_ucmd *ucmd); struct iommufd_access { struct iommufd_object obj; struct iommufd_ctx *ictx; struct iommufd_ioas *ioas; struct iommufd_ioas *ioas_unpin; struct mutex ioas_lock; const struct iommufd_access_ops *ops; void *data; unsigned long iova_alignment; u32 iopt_access_list_id; }; int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); void iopt_remove_access(struct io_pagetable *iopt, struct iommufd_access *access, u32 iopt_access_list_id); void iommufd_access_destroy_object(struct iommufd_object *obj); #ifdef CONFIG_IOMMUFD_TEST int iommufd_test(struct iommufd_ucmd *ucmd); void iommufd_selftest_destroy(struct iommufd_object *obj); extern size_t iommufd_test_memory_limit; void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, u64 *iova, u32 *flags); bool iommufd_should_fail(void); int __init iommufd_test_init(void); void iommufd_test_exit(void); bool iommufd_selftest_is_mock_dev(struct device *dev); #else static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, unsigned int ioas_id, u64 *iova, u32 *flags) { } static inline bool iommufd_should_fail(void) { return false; } static inline int __init iommufd_test_init(void) { return 0; } static inline void iommufd_test_exit(void) { } static inline bool iommufd_selftest_is_mock_dev(struct device *dev) { return false; } #endif #endif |