i915_gem_userptr.c - drivers/gpu/drm/i915/gem/i915_gem_userptr.c - Linux source code v3.1

Note: File does not exist in v3.1.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2012-2014 Intel Corporation
  5 */
  6
  7#include <linux/mmu_context.h>
  8#include <linux/mmu_notifier.h>
  9#include <linux/mempolicy.h>
 10#include <linux/swap.h>
 11#include <linux/sched/mm.h>
 12
 13#include "i915_drv.h"
 14#include "i915_gem_ioctls.h"
 15#include "i915_gem_object.h"
 16#include "i915_scatterlist.h"
 17
 18struct i915_mm_struct {
 19	struct mm_struct *mm;
 20	struct drm_i915_private *i915;
 21	struct i915_mmu_notifier *mn;
 22	struct hlist_node node;
 23	struct kref kref;
 24	struct rcu_work work;
 25};
 26
 27#if defined(CONFIG_MMU_NOTIFIER)
 28#include <linux/interval_tree.h>
 29
 30struct i915_mmu_notifier {
 31	spinlock_t lock;
 32	struct hlist_node node;
 33	struct mmu_notifier mn;
 34	struct rb_root_cached objects;
 35	struct i915_mm_struct *mm;
 36};
 37
 38struct i915_mmu_object {
 39	struct i915_mmu_notifier *mn;
 40	struct drm_i915_gem_object *obj;
 41	struct interval_tree_node it;
 42};
 43
 44static void add_object(struct i915_mmu_object *mo)
 45{
 46	GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
 47	interval_tree_insert(&mo->it, &mo->mn->objects);
 48}
 49
 50static void del_object(struct i915_mmu_object *mo)
 51{
 52	if (RB_EMPTY_NODE(&mo->it.rb))
 53		return;
 54
 55	interval_tree_remove(&mo->it, &mo->mn->objects);
 56	RB_CLEAR_NODE(&mo->it.rb);
 57}
 58
 59static void
 60__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
 61{
 62	struct i915_mmu_object *mo = obj->userptr.mmu_object;
 63
 64	/*
 65	 * During mm_invalidate_range we need to cancel any userptr that
 66	 * overlaps the range being invalidated. Doing so requires the
 67	 * struct_mutex, and that risks recursion. In order to cause
 68	 * recursion, the user must alias the userptr address space with
 69	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
 70	 * to invalidate that mmaping, mm_invalidate_range is called with
 71	 * the userptr address *and* the struct_mutex held.  To prevent that
 72	 * we set a flag under the i915_mmu_notifier spinlock to indicate
 73	 * whether this object is valid.
 74	 */
 75	if (!mo)
 76		return;
 77
 78	spin_lock(&mo->mn->lock);
 79	if (value)
 80		add_object(mo);
 81	else
 82		del_object(mo);
 83	spin_unlock(&mo->mn->lock);
 84}
 85
 86static int
 87userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 88				  const struct mmu_notifier_range *range)
 89{
 90	struct i915_mmu_notifier *mn =
 91		container_of(_mn, struct i915_mmu_notifier, mn);
 92	struct interval_tree_node *it;
 93	unsigned long end;
 94	int ret = 0;
 95
 96	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
 97		return 0;
 98
 99	/* interval ranges are inclusive, but invalidate range is exclusive */
100	end = range->end - 1;
101
102	spin_lock(&mn->lock);
103	it = interval_tree_iter_first(&mn->objects, range->start, end);
104	while (it) {
105		struct drm_i915_gem_object *obj;
106
107		if (!mmu_notifier_range_blockable(range)) {
108			ret = -EAGAIN;
109			break;
110		}
111
112		/*
113		 * The mmu_object is released late when destroying the
114		 * GEM object so it is entirely possible to gain a
115		 * reference on an object in the process of being freed
116		 * since our serialisation is via the spinlock and not
117		 * the struct_mutex - and consequently use it after it
118		 * is freed and then double free it. To prevent that
119		 * use-after-free we only acquire a reference on the
120		 * object if it is not in the process of being destroyed.
121		 */
122		obj = container_of(it, struct i915_mmu_object, it)->obj;
123		if (!kref_get_unless_zero(&obj->base.refcount)) {
124			it = interval_tree_iter_next(it, range->start, end);
125			continue;
126		}
127		spin_unlock(&mn->lock);
128
129		ret = i915_gem_object_unbind(obj,
130					     I915_GEM_OBJECT_UNBIND_ACTIVE |
131					     I915_GEM_OBJECT_UNBIND_BARRIER);
132		if (ret == 0)
133			ret = __i915_gem_object_put_pages(obj);
134		i915_gem_object_put(obj);
135		if (ret)
136			return ret;
137
138		spin_lock(&mn->lock);
139
140		/*
141		 * As we do not (yet) protect the mmu from concurrent insertion
142		 * over this range, there is no guarantee that this search will
143		 * terminate given a pathologic workload.
144		 */
145		it = interval_tree_iter_first(&mn->objects, range->start, end);
146	}
147	spin_unlock(&mn->lock);
148
149	return ret;
150
151}
152
153static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
154	.invalidate_range_start = userptr_mn_invalidate_range_start,
155};
156
157static struct i915_mmu_notifier *
158i915_mmu_notifier_create(struct i915_mm_struct *mm)
159{
160	struct i915_mmu_notifier *mn;
161
162	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
163	if (mn == NULL)
164		return ERR_PTR(-ENOMEM);
165
166	spin_lock_init(&mn->lock);
167	mn->mn.ops = &i915_gem_userptr_notifier;
168	mn->objects = RB_ROOT_CACHED;
169	mn->mm = mm;
170
171	return mn;
172}
173
174static void
175i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
176{
177	struct i915_mmu_object *mo;
178
179	mo = fetch_and_zero(&obj->userptr.mmu_object);
180	if (!mo)
181		return;
182
183	spin_lock(&mo->mn->lock);
184	del_object(mo);
185	spin_unlock(&mo->mn->lock);
186	kfree(mo);
187}
188
189static struct i915_mmu_notifier *
190i915_mmu_notifier_find(struct i915_mm_struct *mm)
191{
192	struct i915_mmu_notifier *mn, *old;
193	int err;
194
195	mn = READ_ONCE(mm->mn);
196	if (likely(mn))
197		return mn;
198
199	mn = i915_mmu_notifier_create(mm);
200	if (IS_ERR(mn))
201		return mn;
202
203	err = mmu_notifier_register(&mn->mn, mm->mm);
204	if (err) {
205		kfree(mn);
206		return ERR_PTR(err);
207	}
208
209	old = cmpxchg(&mm->mn, NULL, mn);
210	if (old) {
211		mmu_notifier_unregister(&mn->mn, mm->mm);
212		kfree(mn);
213		mn = old;
214	}
215
216	return mn;
217}
218
219static int
220i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
221				    unsigned flags)
222{
223	struct i915_mmu_notifier *mn;
224	struct i915_mmu_object *mo;
225
226	if (flags & I915_USERPTR_UNSYNCHRONIZED)
227		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
228
229	if (GEM_WARN_ON(!obj->userptr.mm))
230		return -EINVAL;
231
232	mn = i915_mmu_notifier_find(obj->userptr.mm);
233	if (IS_ERR(mn))
234		return PTR_ERR(mn);
235
236	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
237	if (!mo)
238		return -ENOMEM;
239
240	mo->mn = mn;
241	mo->obj = obj;
242	mo->it.start = obj->userptr.ptr;
243	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
244	RB_CLEAR_NODE(&mo->it.rb);
245
246	obj->userptr.mmu_object = mo;
247	return 0;
248}
249
250static void
251i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
252		       struct mm_struct *mm)
253{
254	if (mn == NULL)
255		return;
256
257	mmu_notifier_unregister(&mn->mn, mm);
258	kfree(mn);
259}
260
261#else
262
263static void
264__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
265{
266}
267
268static void
269i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
270{
271}
272
273static int
274i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
275				    unsigned flags)
276{
277	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
278		return -ENODEV;
279
280	if (!capable(CAP_SYS_ADMIN))
281		return -EPERM;
282
283	return 0;
284}
285
286static void
287i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
288		       struct mm_struct *mm)
289{
290}
291
292#endif
293
294static struct i915_mm_struct *
295__i915_mm_struct_find(struct drm_i915_private *i915, struct mm_struct *real)
296{
297	struct i915_mm_struct *it, *mm = NULL;
298
299	rcu_read_lock();
300	hash_for_each_possible_rcu(i915->mm_structs,
301				   it, node,
302				   (unsigned long)real)
303		if (it->mm == real && kref_get_unless_zero(&it->kref)) {
304			mm = it;
305			break;
306		}
307	rcu_read_unlock();
308
309	return mm;
310}
311
312static int
313i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
314{
315	struct drm_i915_private *i915 = to_i915(obj->base.dev);
316	struct i915_mm_struct *mm, *new;
317	int ret = 0;
318
319	/* During release of the GEM object we hold the struct_mutex. This
320	 * precludes us from calling mmput() at that time as that may be
321	 * the last reference and so call exit_mmap(). exit_mmap() will
322	 * attempt to reap the vma, and if we were holding a GTT mmap
323	 * would then call drm_gem_vm_close() and attempt to reacquire
324	 * the struct mutex. So in order to avoid that recursion, we have
325	 * to defer releasing the mm reference until after we drop the
326	 * struct_mutex, i.e. we need to schedule a worker to do the clean
327	 * up.
328	 */
329	mm = __i915_mm_struct_find(i915, current->mm);
330	if (mm)
331		goto out;
332
333	new = kmalloc(sizeof(*mm), GFP_KERNEL);
334	if (!new)
335		return -ENOMEM;
336
337	kref_init(&new->kref);
338	new->i915 = to_i915(obj->base.dev);
339	new->mm = current->mm;
340	new->mn = NULL;
341
342	spin_lock(&i915->mm_lock);
343	mm = __i915_mm_struct_find(i915, current->mm);
344	if (!mm) {
345		hash_add_rcu(i915->mm_structs,
346			     &new->node,
347			     (unsigned long)new->mm);
348		mmgrab(current->mm);
349		mm = new;
350	}
351	spin_unlock(&i915->mm_lock);
352	if (mm != new)
353		kfree(new);
354
355out:
356	obj->userptr.mm = mm;
357	return ret;
358}
359
360static void
361__i915_mm_struct_free__worker(struct work_struct *work)
362{
363	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work.work);
364
365	i915_mmu_notifier_free(mm->mn, mm->mm);
366	mmdrop(mm->mm);
367	kfree(mm);
368}
369
370static void
371__i915_mm_struct_free(struct kref *kref)
372{
373	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
374
375	spin_lock(&mm->i915->mm_lock);
376	hash_del_rcu(&mm->node);
377	spin_unlock(&mm->i915->mm_lock);
378
379	INIT_RCU_WORK(&mm->work, __i915_mm_struct_free__worker);
380	queue_rcu_work(system_wq, &mm->work);
381}
382
383static void
384i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
385{
386	if (obj->userptr.mm == NULL)
387		return;
388
389	kref_put(&obj->userptr.mm->kref, __i915_mm_struct_free);
390	obj->userptr.mm = NULL;
391}
392
393struct get_pages_work {
394	struct work_struct work;
395	struct drm_i915_gem_object *obj;
396	struct task_struct *task;
397};
398
399static struct sg_table *
400__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
401			       struct page **pvec, unsigned long num_pages)
402{
403	unsigned int max_segment = i915_sg_segment_size();
404	struct sg_table *st;
405	unsigned int sg_page_sizes;
406	int ret;
407
408	st = kmalloc(sizeof(*st), GFP_KERNEL);
409	if (!st)
410		return ERR_PTR(-ENOMEM);
411
412alloc_table:
413	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
414					  0, num_pages << PAGE_SHIFT,
415					  max_segment,
416					  GFP_KERNEL);
417	if (ret) {
418		kfree(st);
419		return ERR_PTR(ret);
420	}
421
422	ret = i915_gem_gtt_prepare_pages(obj, st);
423	if (ret) {
424		sg_free_table(st);
425
426		if (max_segment > PAGE_SIZE) {
427			max_segment = PAGE_SIZE;
428			goto alloc_table;
429		}
430
431		kfree(st);
432		return ERR_PTR(ret);
433	}
434
435	sg_page_sizes = i915_sg_page_sizes(st->sgl);
436
437	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
438
439	return st;
440}
441
442static void
443__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
444{
445	struct get_pages_work *work = container_of(_work, typeof(*work), work);
446	struct drm_i915_gem_object *obj = work->obj;
447	const unsigned long npages = obj->base.size >> PAGE_SHIFT;
448	unsigned long pinned;
449	struct page **pvec;
450	int ret;
451
452	ret = -ENOMEM;
453	pinned = 0;
454
455	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
456	if (pvec != NULL) {
457		struct mm_struct *mm = obj->userptr.mm->mm;
458		unsigned int flags = 0;
459		int locked = 0;
460
461		if (!i915_gem_object_is_readonly(obj))
462			flags |= FOLL_WRITE;
463
464		ret = -EFAULT;
465		if (mmget_not_zero(mm)) {
466			while (pinned < npages) {
467				if (!locked) {
468					mmap_read_lock(mm);
469					locked = 1;
470				}
471				ret = pin_user_pages_remote
472					(mm,
473					 obj->userptr.ptr + pinned * PAGE_SIZE,
474					 npages - pinned,
475					 flags,
476					 pvec + pinned, NULL, &locked);
477				if (ret < 0)
478					break;
479
480				pinned += ret;
481			}
482			if (locked)
483				mmap_read_unlock(mm);
484			mmput(mm);
485		}
486	}
487
488	mutex_lock_nested(&obj->mm.lock, I915_MM_GET_PAGES);
489	if (obj->userptr.work == &work->work) {
490		struct sg_table *pages = ERR_PTR(ret);
491
492		if (pinned == npages) {
493			pages = __i915_gem_userptr_alloc_pages(obj, pvec,
494							       npages);
495			if (!IS_ERR(pages)) {
496				pinned = 0;
497				pages = NULL;
498			}
499		}
500
501		obj->userptr.work = ERR_CAST(pages);
502		if (IS_ERR(pages))
503			__i915_gem_userptr_set_active(obj, false);
504	}
505	mutex_unlock(&obj->mm.lock);
506
507	unpin_user_pages(pvec, pinned);
508	kvfree(pvec);
509
510	i915_gem_object_put(obj);
511	put_task_struct(work->task);
512	kfree(work);
513}
514
515static struct sg_table *
516__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
517{
518	struct get_pages_work *work;
519
520	/* Spawn a worker so that we can acquire the
521	 * user pages without holding our mutex. Access
522	 * to the user pages requires mmap_lock, and we have
523	 * a strict lock ordering of mmap_lock, struct_mutex -
524	 * we already hold struct_mutex here and so cannot
525	 * call gup without encountering a lock inversion.
526	 *
527	 * Userspace will keep on repeating the operation
528	 * (thanks to EAGAIN) until either we hit the fast
529	 * path or the worker completes. If the worker is
530	 * cancelled or superseded, the task is still run
531	 * but the results ignored. (This leads to
532	 * complications that we may have a stray object
533	 * refcount that we need to be wary of when
534	 * checking for existing objects during creation.)
535	 * If the worker encounters an error, it reports
536	 * that error back to this function through
537	 * obj->userptr.work = ERR_PTR.
538	 */
539	work = kmalloc(sizeof(*work), GFP_KERNEL);
540	if (work == NULL)
541		return ERR_PTR(-ENOMEM);
542
543	obj->userptr.work = &work->work;
544
545	work->obj = i915_gem_object_get(obj);
546
547	work->task = current;
548	get_task_struct(work->task);
549
550	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
551	queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
552
553	return ERR_PTR(-EAGAIN);
554}
555
556static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
557{
558	const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
559	struct mm_struct *mm = obj->userptr.mm->mm;
560	struct page **pvec;
561	struct sg_table *pages;
562	bool active;
563	int pinned;
564	unsigned int gup_flags = 0;
565
566	/* If userspace should engineer that these pages are replaced in
567	 * the vma between us binding this page into the GTT and completion
568	 * of rendering... Their loss. If they change the mapping of their
569	 * pages they need to create a new bo to point to the new vma.
570	 *
571	 * However, that still leaves open the possibility of the vma
572	 * being copied upon fork. Which falls under the same userspace
573	 * synchronisation issue as a regular bo, except that this time
574	 * the process may not be expecting that a particular piece of
575	 * memory is tied to the GPU.
576	 *
577	 * Fortunately, we can hook into the mmu_notifier in order to
578	 * discard the page references prior to anything nasty happening
579	 * to the vma (discard or cloning) which should prevent the more
580	 * egregious cases from causing harm.
581	 */
582
583	if (obj->userptr.work) {
584		/* active flag should still be held for the pending work */
585		if (IS_ERR(obj->userptr.work))
586			return PTR_ERR(obj->userptr.work);
587		else
588			return -EAGAIN;
589	}
590
591	pvec = NULL;
592	pinned = 0;
593
594	if (mm == current->mm) {
595		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
596				      GFP_KERNEL |
597				      __GFP_NORETRY |
598				      __GFP_NOWARN);
599		if (pvec) {
600			/* defer to worker if malloc fails */
601			if (!i915_gem_object_is_readonly(obj))
602				gup_flags |= FOLL_WRITE;
603			pinned = pin_user_pages_fast_only(obj->userptr.ptr,
604							  num_pages, gup_flags,
605							  pvec);
606		}
607	}
608
609	active = false;
610	if (pinned < 0) {
611		pages = ERR_PTR(pinned);
612		pinned = 0;
613	} else if (pinned < num_pages) {
614		pages = __i915_gem_userptr_get_pages_schedule(obj);
615		active = pages == ERR_PTR(-EAGAIN);
616	} else {
617		pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
618		active = !IS_ERR(pages);
619	}
620	if (active)
621		__i915_gem_userptr_set_active(obj, true);
622
623	if (IS_ERR(pages))
624		unpin_user_pages(pvec, pinned);
625	kvfree(pvec);
626
627	return PTR_ERR_OR_ZERO(pages);
628}
629
630static void
631i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
632			   struct sg_table *pages)
633{
634	struct sgt_iter sgt_iter;
635	struct page *page;
636
637	/* Cancel any inflight work and force them to restart their gup */
638	obj->userptr.work = NULL;
639	__i915_gem_userptr_set_active(obj, false);
640	if (!pages)
641		return;
642
643	__i915_gem_object_release_shmem(obj, pages, true);
644	i915_gem_gtt_finish_pages(obj, pages);
645
646	/*
647	 * We always mark objects as dirty when they are used by the GPU,
648	 * just in case. However, if we set the vma as being read-only we know
649	 * that the object will never have been written to.
650	 */
651	if (i915_gem_object_is_readonly(obj))
652		obj->mm.dirty = false;
653
654	for_each_sgt_page(page, sgt_iter, pages) {
655		if (obj->mm.dirty && trylock_page(page)) {
656			/*
657			 * As this may not be anonymous memory (e.g. shmem)
658			 * but exist on a real mapping, we have to lock
659			 * the page in order to dirty it -- holding
660			 * the page reference is not sufficient to
661			 * prevent the inode from being truncated.
662			 * Play safe and take the lock.
663			 *
664			 * However...!
665			 *
666			 * The mmu-notifier can be invalidated for a
667			 * migrate_page, that is alreadying holding the lock
668			 * on the page. Such a try_to_unmap() will result
669			 * in us calling put_pages() and so recursively try
670			 * to lock the page. We avoid that deadlock with
671			 * a trylock_page() and in exchange we risk missing
672			 * some page dirtying.
673			 */
674			set_page_dirty(page);
675			unlock_page(page);
676		}
677
678		mark_page_accessed(page);
679		unpin_user_page(page);
680	}
681	obj->mm.dirty = false;
682
683	sg_free_table(pages);
684	kfree(pages);
685}
686
687static void
688i915_gem_userptr_release(struct drm_i915_gem_object *obj)
689{
690	i915_gem_userptr_release__mmu_notifier(obj);
691	i915_gem_userptr_release__mm_struct(obj);
692}
693
694static int
695i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
696{
697	if (obj->userptr.mmu_object)
698		return 0;
699
700	return i915_gem_userptr_init__mmu_notifier(obj, 0);
701}
702
703static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
704	.name = "i915_gem_object_userptr",
705	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
706		 I915_GEM_OBJECT_IS_SHRINKABLE |
707		 I915_GEM_OBJECT_NO_MMAP |
708		 I915_GEM_OBJECT_ASYNC_CANCEL,
709	.get_pages = i915_gem_userptr_get_pages,
710	.put_pages = i915_gem_userptr_put_pages,
711	.dmabuf_export = i915_gem_userptr_dmabuf_export,
712	.release = i915_gem_userptr_release,
713};
714
715/*
716 * Creates a new mm object that wraps some normal memory from the process
717 * context - user memory.
718 *
719 * We impose several restrictions upon the memory being mapped
720 * into the GPU.
721 * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
722 * 2. It must be normal system memory, not a pointer into another map of IO
723 *    space (e.g. it must not be a GTT mmapping of another object).
724 * 3. We only allow a bo as large as we could in theory map into the GTT,
725 *    that is we limit the size to the total size of the GTT.
726 * 4. The bo is marked as being snoopable. The backing pages are left
727 *    accessible directly by the CPU, but reads and writes by the GPU may
728 *    incur the cost of a snoop (unless you have an LLC architecture).
729 *
730 * Synchronisation between multiple users and the GPU is left to userspace
731 * through the normal set-domain-ioctl. The kernel will enforce that the
732 * GPU relinquishes the VMA before it is returned back to the system
733 * i.e. upon free(), munmap() or process termination. However, the userspace
734 * malloc() library may not immediately relinquish the VMA after free() and
735 * instead reuse it whilst the GPU is still reading and writing to the VMA.
736 * Caveat emptor.
737 *
738 * Also note, that the object created here is not currently a "first class"
739 * object, in that several ioctls are banned. These are the CPU access
740 * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
741 * direct access via your pointer rather than use those ioctls. Another
742 * restriction is that we do not allow userptr surfaces to be pinned to the
743 * hardware and so we reject any attempt to create a framebuffer out of a
744 * userptr.
745 *
746 * If you think this is a good interface to use to pass GPU memory between
747 * drivers, please use dma-buf instead. In fact, wherever possible use
748 * dma-buf instead.
749 */
750int
751i915_gem_userptr_ioctl(struct drm_device *dev,
752		       void *data,
753		       struct drm_file *file)
754{
755	static struct lock_class_key lock_class;
756	struct drm_i915_private *dev_priv = to_i915(dev);
757	struct drm_i915_gem_userptr *args = data;
758	struct drm_i915_gem_object *obj;
759	int ret;
760	u32 handle;
761
762	if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
763		/* We cannot support coherent userptr objects on hw without
764		 * LLC and broken snooping.
765		 */
766		return -ENODEV;
767	}
768
769	if (args->flags & ~(I915_USERPTR_READ_ONLY |
770			    I915_USERPTR_UNSYNCHRONIZED))
771		return -EINVAL;
772
773	/*
774	 * XXX: There is a prevalence of the assumption that we fit the
775	 * object's page count inside a 32bit _signed_ variable. Let's document
776	 * this and catch if we ever need to fix it. In the meantime, if you do
777	 * spot such a local variable, please consider fixing!
778	 *
779	 * Aside from our own locals (for which we have no excuse!):
780	 * - sg_table embeds unsigned int for num_pages
781	 * - get_user_pages*() mixed ints with longs
782	 */
783
784	if (args->user_size >> PAGE_SHIFT > INT_MAX)
785		return -E2BIG;
786
787	if (overflows_type(args->user_size, obj->base.size))
788		return -E2BIG;
789
790	if (!args->user_size)
791		return -EINVAL;
792
793	if (offset_in_page(args->user_ptr | args->user_size))
794		return -EINVAL;
795
796	if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
797		return -EFAULT;
798
799	if (args->flags & I915_USERPTR_READ_ONLY) {
800		/*
801		 * On almost all of the older hw, we cannot tell the GPU that
802		 * a page is readonly.
803		 */
804		if (!dev_priv->gt.vm->has_read_only)
805			return -ENODEV;
806	}
807
808	obj = i915_gem_object_alloc();
809	if (obj == NULL)
810		return -ENOMEM;
811
812	drm_gem_private_object_init(dev, &obj->base, args->user_size);
813	i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class);
814	obj->read_domains = I915_GEM_DOMAIN_CPU;
815	obj->write_domain = I915_GEM_DOMAIN_CPU;
816	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
817
818	obj->userptr.ptr = args->user_ptr;
819	if (args->flags & I915_USERPTR_READ_ONLY)
820		i915_gem_object_set_readonly(obj);
821
822	/* And keep a pointer to the current->mm for resolving the user pages
823	 * at binding. This means that we need to hook into the mmu_notifier
824	 * in order to detect if the mmu is destroyed.
825	 */
826	ret = i915_gem_userptr_init__mm_struct(obj);
827	if (ret == 0)
828		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
829	if (ret == 0)
830		ret = drm_gem_handle_create(file, &obj->base, &handle);
831
832	/* drop reference from allocate - handle holds it now */
833	i915_gem_object_put(obj);
834	if (ret)
835		return ret;
836
837	args->handle = handle;
838	return 0;
839}
840
841int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
842{
843	spin_lock_init(&dev_priv->mm_lock);
844	hash_init(dev_priv->mm_structs);
845
846	dev_priv->mm.userptr_wq =
847		alloc_workqueue("i915-userptr-acquire",
848				WQ_HIGHPRI | WQ_UNBOUND,
849				0);
850	if (!dev_priv->mm.userptr_wq)
851		return -ENOMEM;
852
853	return 0;
854}
855
856void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
857{
858	destroy_workqueue(dev_priv->mm.userptr_wq);
859}