i915_gem_userptr.c - drivers/gpu/drm/i915/gem/i915_gem_userptr.c - Linux source code v4.6

Note: File does not exist in v4.6.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2012-2014 Intel Corporation
  5 */
  6
  7#include <linux/mmu_context.h>
  8#include <linux/mmu_notifier.h>
  9#include <linux/mempolicy.h>
 10#include <linux/swap.h>
 11#include <linux/sched/mm.h>
 12
 13#include <drm/i915_drm.h>
 14
 15#include "i915_drv.h"
 16#include "i915_gem_ioctls.h"
 17#include "i915_gem_object.h"
 18#include "i915_scatterlist.h"
 19
 20struct i915_mm_struct {
 21	struct mm_struct *mm;
 22	struct drm_i915_private *i915;
 23	struct i915_mmu_notifier *mn;
 24	struct hlist_node node;
 25	struct kref kref;
 26	struct work_struct work;
 27};
 28
 29#if defined(CONFIG_MMU_NOTIFIER)
 30#include <linux/interval_tree.h>
 31
 32struct i915_mmu_notifier {
 33	spinlock_t lock;
 34	struct hlist_node node;
 35	struct mmu_notifier mn;
 36	struct rb_root_cached objects;
 37	struct i915_mm_struct *mm;
 38};
 39
 40struct i915_mmu_object {
 41	struct i915_mmu_notifier *mn;
 42	struct drm_i915_gem_object *obj;
 43	struct interval_tree_node it;
 44};
 45
 46static void add_object(struct i915_mmu_object *mo)
 47{
 48	GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb));
 49	interval_tree_insert(&mo->it, &mo->mn->objects);
 50}
 51
 52static void del_object(struct i915_mmu_object *mo)
 53{
 54	if (RB_EMPTY_NODE(&mo->it.rb))
 55		return;
 56
 57	interval_tree_remove(&mo->it, &mo->mn->objects);
 58	RB_CLEAR_NODE(&mo->it.rb);
 59}
 60
 61static void
 62__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
 63{
 64	struct i915_mmu_object *mo = obj->userptr.mmu_object;
 65
 66	/*
 67	 * During mm_invalidate_range we need to cancel any userptr that
 68	 * overlaps the range being invalidated. Doing so requires the
 69	 * struct_mutex, and that risks recursion. In order to cause
 70	 * recursion, the user must alias the userptr address space with
 71	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
 72	 * to invalidate that mmaping, mm_invalidate_range is called with
 73	 * the userptr address *and* the struct_mutex held.  To prevent that
 74	 * we set a flag under the i915_mmu_notifier spinlock to indicate
 75	 * whether this object is valid.
 76	 */
 77	if (!mo)
 78		return;
 79
 80	spin_lock(&mo->mn->lock);
 81	if (value)
 82		add_object(mo);
 83	else
 84		del_object(mo);
 85	spin_unlock(&mo->mn->lock);
 86}
 87
 88static int
 89userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 90				  const struct mmu_notifier_range *range)
 91{
 92	struct i915_mmu_notifier *mn =
 93		container_of(_mn, struct i915_mmu_notifier, mn);
 94	struct interval_tree_node *it;
 95	struct mutex *unlock = NULL;
 96	unsigned long end;
 97	int ret = 0;
 98
 99	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
100		return 0;
101
102	/* interval ranges are inclusive, but invalidate range is exclusive */
103	end = range->end - 1;
104
105	spin_lock(&mn->lock);
106	it = interval_tree_iter_first(&mn->objects, range->start, end);
107	while (it) {
108		struct drm_i915_gem_object *obj;
109
110		if (!mmu_notifier_range_blockable(range)) {
111			ret = -EAGAIN;
112			break;
113		}
114
115		/*
116		 * The mmu_object is released late when destroying the
117		 * GEM object so it is entirely possible to gain a
118		 * reference on an object in the process of being freed
119		 * since our serialisation is via the spinlock and not
120		 * the struct_mutex - and consequently use it after it
121		 * is freed and then double free it. To prevent that
122		 * use-after-free we only acquire a reference on the
123		 * object if it is not in the process of being destroyed.
124		 */
125		obj = container_of(it, struct i915_mmu_object, it)->obj;
126		if (!kref_get_unless_zero(&obj->base.refcount)) {
127			it = interval_tree_iter_next(it, range->start, end);
128			continue;
129		}
130		spin_unlock(&mn->lock);
131
132		if (!unlock) {
133			unlock = &mn->mm->i915->drm.struct_mutex;
134
135			switch (mutex_trylock_recursive(unlock)) {
136			default:
137			case MUTEX_TRYLOCK_FAILED:
138				if (mutex_lock_killable_nested(unlock, I915_MM_SHRINKER)) {
139					i915_gem_object_put(obj);
140					return -EINTR;
141				}
142				/* fall through */
143			case MUTEX_TRYLOCK_SUCCESS:
144				break;
145
146			case MUTEX_TRYLOCK_RECURSIVE:
147				unlock = ERR_PTR(-EEXIST);
148				break;
149			}
150		}
151
152		ret = i915_gem_object_unbind(obj,
153					     I915_GEM_OBJECT_UNBIND_ACTIVE);
154		if (ret == 0)
155			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
156		i915_gem_object_put(obj);
157		if (ret)
158			goto unlock;
159
160		spin_lock(&mn->lock);
161
162		/*
163		 * As we do not (yet) protect the mmu from concurrent insertion
164		 * over this range, there is no guarantee that this search will
165		 * terminate given a pathologic workload.
166		 */
167		it = interval_tree_iter_first(&mn->objects, range->start, end);
168	}
169	spin_unlock(&mn->lock);
170
171unlock:
172	if (!IS_ERR_OR_NULL(unlock))
173		mutex_unlock(unlock);
174
175	return ret;
176
177}
178
179static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
180	.invalidate_range_start = userptr_mn_invalidate_range_start,
181};
182
183static struct i915_mmu_notifier *
184i915_mmu_notifier_create(struct i915_mm_struct *mm)
185{
186	struct i915_mmu_notifier *mn;
187
188	mn = kmalloc(sizeof(*mn), GFP_KERNEL);
189	if (mn == NULL)
190		return ERR_PTR(-ENOMEM);
191
192	spin_lock_init(&mn->lock);
193	mn->mn.ops = &i915_gem_userptr_notifier;
194	mn->objects = RB_ROOT_CACHED;
195	mn->mm = mm;
196
197	return mn;
198}
199
200static void
201i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
202{
203	struct i915_mmu_object *mo;
204
205	mo = fetch_and_zero(&obj->userptr.mmu_object);
206	if (!mo)
207		return;
208
209	spin_lock(&mo->mn->lock);
210	del_object(mo);
211	spin_unlock(&mo->mn->lock);
212	kfree(mo);
213}
214
215static struct i915_mmu_notifier *
216i915_mmu_notifier_find(struct i915_mm_struct *mm)
217{
218	struct i915_mmu_notifier *mn;
219	int err = 0;
220
221	mn = mm->mn;
222	if (mn)
223		return mn;
224
225	mn = i915_mmu_notifier_create(mm);
226	if (IS_ERR(mn))
227		err = PTR_ERR(mn);
228
229	down_write(&mm->mm->mmap_sem);
230	mutex_lock(&mm->i915->mm_lock);
231	if (mm->mn == NULL && !err) {
232		/* Protected by mmap_sem (write-lock) */
233		err = __mmu_notifier_register(&mn->mn, mm->mm);
234		if (!err) {
235			/* Protected by mm_lock */
236			mm->mn = fetch_and_zero(&mn);
237		}
238	} else if (mm->mn) {
239		/*
240		 * Someone else raced and successfully installed the mmu
241		 * notifier, we can cancel our own errors.
242		 */
243		err = 0;
244	}
245	mutex_unlock(&mm->i915->mm_lock);
246	up_write(&mm->mm->mmap_sem);
247
248	if (mn && !IS_ERR(mn))
249		kfree(mn);
250
251	return err ? ERR_PTR(err) : mm->mn;
252}
253
254static int
255i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
256				    unsigned flags)
257{
258	struct i915_mmu_notifier *mn;
259	struct i915_mmu_object *mo;
260
261	if (flags & I915_USERPTR_UNSYNCHRONIZED)
262		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
263
264	if (WARN_ON(obj->userptr.mm == NULL))
265		return -EINVAL;
266
267	mn = i915_mmu_notifier_find(obj->userptr.mm);
268	if (IS_ERR(mn))
269		return PTR_ERR(mn);
270
271	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
272	if (!mo)
273		return -ENOMEM;
274
275	mo->mn = mn;
276	mo->obj = obj;
277	mo->it.start = obj->userptr.ptr;
278	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
279	RB_CLEAR_NODE(&mo->it.rb);
280
281	obj->userptr.mmu_object = mo;
282	return 0;
283}
284
285static void
286i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
287		       struct mm_struct *mm)
288{
289	if (mn == NULL)
290		return;
291
292	mmu_notifier_unregister(&mn->mn, mm);
293	kfree(mn);
294}
295
296#else
297
298static void
299__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
300{
301}
302
303static void
304i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
305{
306}
307
308static int
309i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
310				    unsigned flags)
311{
312	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
313		return -ENODEV;
314
315	if (!capable(CAP_SYS_ADMIN))
316		return -EPERM;
317
318	return 0;
319}
320
321static void
322i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
323		       struct mm_struct *mm)
324{
325}
326
327#endif
328
329static struct i915_mm_struct *
330__i915_mm_struct_find(struct drm_i915_private *dev_priv, struct mm_struct *real)
331{
332	struct i915_mm_struct *mm;
333
334	/* Protected by dev_priv->mm_lock */
335	hash_for_each_possible(dev_priv->mm_structs, mm, node, (unsigned long)real)
336		if (mm->mm == real)
337			return mm;
338
339	return NULL;
340}
341
342static int
343i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
344{
345	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
346	struct i915_mm_struct *mm;
347	int ret = 0;
348
349	/* During release of the GEM object we hold the struct_mutex. This
350	 * precludes us from calling mmput() at that time as that may be
351	 * the last reference and so call exit_mmap(). exit_mmap() will
352	 * attempt to reap the vma, and if we were holding a GTT mmap
353	 * would then call drm_gem_vm_close() and attempt to reacquire
354	 * the struct mutex. So in order to avoid that recursion, we have
355	 * to defer releasing the mm reference until after we drop the
356	 * struct_mutex, i.e. we need to schedule a worker to do the clean
357	 * up.
358	 */
359	mutex_lock(&dev_priv->mm_lock);
360	mm = __i915_mm_struct_find(dev_priv, current->mm);
361	if (mm == NULL) {
362		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
363		if (mm == NULL) {
364			ret = -ENOMEM;
365			goto out;
366		}
367
368		kref_init(&mm->kref);
369		mm->i915 = to_i915(obj->base.dev);
370
371		mm->mm = current->mm;
372		mmgrab(current->mm);
373
374		mm->mn = NULL;
375
376		/* Protected by dev_priv->mm_lock */
377		hash_add(dev_priv->mm_structs,
378			 &mm->node, (unsigned long)mm->mm);
379	} else
380		kref_get(&mm->kref);
381
382	obj->userptr.mm = mm;
383out:
384	mutex_unlock(&dev_priv->mm_lock);
385	return ret;
386}
387
388static void
389__i915_mm_struct_free__worker(struct work_struct *work)
390{
391	struct i915_mm_struct *mm = container_of(work, typeof(*mm), work);
392	i915_mmu_notifier_free(mm->mn, mm->mm);
393	mmdrop(mm->mm);
394	kfree(mm);
395}
396
397static void
398__i915_mm_struct_free(struct kref *kref)
399{
400	struct i915_mm_struct *mm = container_of(kref, typeof(*mm), kref);
401
402	/* Protected by dev_priv->mm_lock */
403	hash_del(&mm->node);
404	mutex_unlock(&mm->i915->mm_lock);
405
406	INIT_WORK(&mm->work, __i915_mm_struct_free__worker);
407	queue_work(mm->i915->mm.userptr_wq, &mm->work);
408}
409
410static void
411i915_gem_userptr_release__mm_struct(struct drm_i915_gem_object *obj)
412{
413	if (obj->userptr.mm == NULL)
414		return;
415
416	kref_put_mutex(&obj->userptr.mm->kref,
417		       __i915_mm_struct_free,
418		       &to_i915(obj->base.dev)->mm_lock);
419	obj->userptr.mm = NULL;
420}
421
422struct get_pages_work {
423	struct work_struct work;
424	struct drm_i915_gem_object *obj;
425	struct task_struct *task;
426};
427
428static struct sg_table *
429__i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
430			       struct page **pvec, int num_pages)
431{
432	unsigned int max_segment = i915_sg_segment_size();
433	struct sg_table *st;
434	unsigned int sg_page_sizes;
435	int ret;
436
437	st = kmalloc(sizeof(*st), GFP_KERNEL);
438	if (!st)
439		return ERR_PTR(-ENOMEM);
440
441alloc_table:
442	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
443					  0, num_pages << PAGE_SHIFT,
444					  max_segment,
445					  GFP_KERNEL);
446	if (ret) {
447		kfree(st);
448		return ERR_PTR(ret);
449	}
450
451	ret = i915_gem_gtt_prepare_pages(obj, st);
452	if (ret) {
453		sg_free_table(st);
454
455		if (max_segment > PAGE_SIZE) {
456			max_segment = PAGE_SIZE;
457			goto alloc_table;
458		}
459
460		kfree(st);
461		return ERR_PTR(ret);
462	}
463
464	sg_page_sizes = i915_sg_page_sizes(st->sgl);
465
466	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
467
468	return st;
469}
470
471static void
472__i915_gem_userptr_get_pages_worker(struct work_struct *_work)
473{
474	struct get_pages_work *work = container_of(_work, typeof(*work), work);
475	struct drm_i915_gem_object *obj = work->obj;
476	const int npages = obj->base.size >> PAGE_SHIFT;
477	struct page **pvec;
478	int pinned, ret;
479
480	ret = -ENOMEM;
481	pinned = 0;
482
483	pvec = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
484	if (pvec != NULL) {
485		struct mm_struct *mm = obj->userptr.mm->mm;
486		unsigned int flags = 0;
487
488		if (!i915_gem_object_is_readonly(obj))
489			flags |= FOLL_WRITE;
490
491		ret = -EFAULT;
492		if (mmget_not_zero(mm)) {
493			down_read(&mm->mmap_sem);
494			while (pinned < npages) {
495				ret = get_user_pages_remote
496					(work->task, mm,
497					 obj->userptr.ptr + pinned * PAGE_SIZE,
498					 npages - pinned,
499					 flags,
500					 pvec + pinned, NULL, NULL);
501				if (ret < 0)
502					break;
503
504				pinned += ret;
505			}
506			up_read(&mm->mmap_sem);
507			mmput(mm);
508		}
509	}
510
511	mutex_lock(&obj->mm.lock);
512	if (obj->userptr.work == &work->work) {
513		struct sg_table *pages = ERR_PTR(ret);
514
515		if (pinned == npages) {
516			pages = __i915_gem_userptr_alloc_pages(obj, pvec,
517							       npages);
518			if (!IS_ERR(pages)) {
519				pinned = 0;
520				pages = NULL;
521			}
522		}
523
524		obj->userptr.work = ERR_CAST(pages);
525		if (IS_ERR(pages))
526			__i915_gem_userptr_set_active(obj, false);
527	}
528	mutex_unlock(&obj->mm.lock);
529
530	release_pages(pvec, pinned);
531	kvfree(pvec);
532
533	i915_gem_object_put(obj);
534	put_task_struct(work->task);
535	kfree(work);
536}
537
538static struct sg_table *
539__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj)
540{
541	struct get_pages_work *work;
542
543	/* Spawn a worker so that we can acquire the
544	 * user pages without holding our mutex. Access
545	 * to the user pages requires mmap_sem, and we have
546	 * a strict lock ordering of mmap_sem, struct_mutex -
547	 * we already hold struct_mutex here and so cannot
548	 * call gup without encountering a lock inversion.
549	 *
550	 * Userspace will keep on repeating the operation
551	 * (thanks to EAGAIN) until either we hit the fast
552	 * path or the worker completes. If the worker is
553	 * cancelled or superseded, the task is still run
554	 * but the results ignored. (This leads to
555	 * complications that we may have a stray object
556	 * refcount that we need to be wary of when
557	 * checking for existing objects during creation.)
558	 * If the worker encounters an error, it reports
559	 * that error back to this function through
560	 * obj->userptr.work = ERR_PTR.
561	 */
562	work = kmalloc(sizeof(*work), GFP_KERNEL);
563	if (work == NULL)
564		return ERR_PTR(-ENOMEM);
565
566	obj->userptr.work = &work->work;
567
568	work->obj = i915_gem_object_get(obj);
569
570	work->task = current;
571	get_task_struct(work->task);
572
573	INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker);
574	queue_work(to_i915(obj->base.dev)->mm.userptr_wq, &work->work);
575
576	return ERR_PTR(-EAGAIN);
577}
578
579static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
580{
581	const int num_pages = obj->base.size >> PAGE_SHIFT;
582	struct mm_struct *mm = obj->userptr.mm->mm;
583	struct page **pvec;
584	struct sg_table *pages;
585	bool active;
586	int pinned;
587
588	/* If userspace should engineer that these pages are replaced in
589	 * the vma between us binding this page into the GTT and completion
590	 * of rendering... Their loss. If they change the mapping of their
591	 * pages they need to create a new bo to point to the new vma.
592	 *
593	 * However, that still leaves open the possibility of the vma
594	 * being copied upon fork. Which falls under the same userspace
595	 * synchronisation issue as a regular bo, except that this time
596	 * the process may not be expecting that a particular piece of
597	 * memory is tied to the GPU.
598	 *
599	 * Fortunately, we can hook into the mmu_notifier in order to
600	 * discard the page references prior to anything nasty happening
601	 * to the vma (discard or cloning) which should prevent the more
602	 * egregious cases from causing harm.
603	 */
604
605	if (obj->userptr.work) {
606		/* active flag should still be held for the pending work */
607		if (IS_ERR(obj->userptr.work))
608			return PTR_ERR(obj->userptr.work);
609		else
610			return -EAGAIN;
611	}
612
613	pvec = NULL;
614	pinned = 0;
615
616	if (mm == current->mm) {
617		pvec = kvmalloc_array(num_pages, sizeof(struct page *),
618				      GFP_KERNEL |
619				      __GFP_NORETRY |
620				      __GFP_NOWARN);
621		if (pvec) /* defer to worker if malloc fails */
622			pinned = __get_user_pages_fast(obj->userptr.ptr,
623						       num_pages,
624						       !i915_gem_object_is_readonly(obj),
625						       pvec);
626	}
627
628	active = false;
629	if (pinned < 0) {
630		pages = ERR_PTR(pinned);
631		pinned = 0;
632	} else if (pinned < num_pages) {
633		pages = __i915_gem_userptr_get_pages_schedule(obj);
634		active = pages == ERR_PTR(-EAGAIN);
635	} else {
636		pages = __i915_gem_userptr_alloc_pages(obj, pvec, num_pages);
637		active = !IS_ERR(pages);
638	}
639	if (active)
640		__i915_gem_userptr_set_active(obj, true);
641
642	if (IS_ERR(pages))
643		release_pages(pvec, pinned);
644	kvfree(pvec);
645
646	return PTR_ERR_OR_ZERO(pages);
647}
648
649static void
650i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
651			   struct sg_table *pages)
652{
653	struct sgt_iter sgt_iter;
654	struct page *page;
655
656	/* Cancel any inflight work and force them to restart their gup */
657	obj->userptr.work = NULL;
658	__i915_gem_userptr_set_active(obj, false);
659	if (!pages)
660		return;
661
662	__i915_gem_object_release_shmem(obj, pages, true);
663	i915_gem_gtt_finish_pages(obj, pages);
664
665	/*
666	 * We always mark objects as dirty when they are used by the GPU,
667	 * just in case. However, if we set the vma as being read-only we know
668	 * that the object will never have been written to.
669	 */
670	if (i915_gem_object_is_readonly(obj))
671		obj->mm.dirty = false;
672
673	for_each_sgt_page(page, sgt_iter, pages) {
674		if (obj->mm.dirty && trylock_page(page)) {
675			/*
676			 * As this may not be anonymous memory (e.g. shmem)
677			 * but exist on a real mapping, we have to lock
678			 * the page in order to dirty it -- holding
679			 * the page reference is not sufficient to
680			 * prevent the inode from being truncated.
681			 * Play safe and take the lock.
682			 *
683			 * However...!
684			 *
685			 * The mmu-notifier can be invalidated for a
686			 * migrate_page, that is alreadying holding the lock
687			 * on the page. Such a try_to_unmap() will result
688			 * in us calling put_pages() and so recursively try
689			 * to lock the page. We avoid that deadlock with
690			 * a trylock_page() and in exchange we risk missing
691			 * some page dirtying.
692			 */
693			set_page_dirty(page);
694			unlock_page(page);
695		}
696
697		mark_page_accessed(page);
698		put_page(page);
699	}
700	obj->mm.dirty = false;
701
702	sg_free_table(pages);
703	kfree(pages);
704}
705
706static void
707i915_gem_userptr_release(struct drm_i915_gem_object *obj)
708{
709	i915_gem_userptr_release__mmu_notifier(obj);
710	i915_gem_userptr_release__mm_struct(obj);
711}
712
713static int
714i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
715{
716	if (obj->userptr.mmu_object)
717		return 0;
718
719	return i915_gem_userptr_init__mmu_notifier(obj, 0);
720}
721
722static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
723	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
724		 I915_GEM_OBJECT_IS_SHRINKABLE |
725		 I915_GEM_OBJECT_NO_GGTT |
726		 I915_GEM_OBJECT_ASYNC_CANCEL,
727	.get_pages = i915_gem_userptr_get_pages,
728	.put_pages = i915_gem_userptr_put_pages,
729	.dmabuf_export = i915_gem_userptr_dmabuf_export,
730	.release = i915_gem_userptr_release,
731};
732
733/*
734 * Creates a new mm object that wraps some normal memory from the process
735 * context - user memory.
736 *
737 * We impose several restrictions upon the memory being mapped
738 * into the GPU.
739 * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
740 * 2. It must be normal system memory, not a pointer into another map of IO
741 *    space (e.g. it must not be a GTT mmapping of another object).
742 * 3. We only allow a bo as large as we could in theory map into the GTT,
743 *    that is we limit the size to the total size of the GTT.
744 * 4. The bo is marked as being snoopable. The backing pages are left
745 *    accessible directly by the CPU, but reads and writes by the GPU may
746 *    incur the cost of a snoop (unless you have an LLC architecture).
747 *
748 * Synchronisation between multiple users and the GPU is left to userspace
749 * through the normal set-domain-ioctl. The kernel will enforce that the
750 * GPU relinquishes the VMA before it is returned back to the system
751 * i.e. upon free(), munmap() or process termination. However, the userspace
752 * malloc() library may not immediately relinquish the VMA after free() and
753 * instead reuse it whilst the GPU is still reading and writing to the VMA.
754 * Caveat emptor.
755 *
756 * Also note, that the object created here is not currently a "first class"
757 * object, in that several ioctls are banned. These are the CPU access
758 * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
759 * direct access via your pointer rather than use those ioctls. Another
760 * restriction is that we do not allow userptr surfaces to be pinned to the
761 * hardware and so we reject any attempt to create a framebuffer out of a
762 * userptr.
763 *
764 * If you think this is a good interface to use to pass GPU memory between
765 * drivers, please use dma-buf instead. In fact, wherever possible use
766 * dma-buf instead.
767 */
768int
769i915_gem_userptr_ioctl(struct drm_device *dev,
770		       void *data,
771		       struct drm_file *file)
772{
773	struct drm_i915_private *dev_priv = to_i915(dev);
774	struct drm_i915_gem_userptr *args = data;
775	struct drm_i915_gem_object *obj;
776	int ret;
777	u32 handle;
778
779	if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
780		/* We cannot support coherent userptr objects on hw without
781		 * LLC and broken snooping.
782		 */
783		return -ENODEV;
784	}
785
786	if (args->flags & ~(I915_USERPTR_READ_ONLY |
787			    I915_USERPTR_UNSYNCHRONIZED))
788		return -EINVAL;
789
790	if (!args->user_size)
791		return -EINVAL;
792
793	if (offset_in_page(args->user_ptr | args->user_size))
794		return -EINVAL;
795
796	if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
797		return -EFAULT;
798
799	if (args->flags & I915_USERPTR_READ_ONLY) {
800		struct i915_address_space *vm;
801
802		/*
803		 * On almost all of the older hw, we cannot tell the GPU that
804		 * a page is readonly.
805		 */
806		vm = dev_priv->kernel_context->vm;
807		if (!vm || !vm->has_read_only)
808			return -ENODEV;
809	}
810
811	obj = i915_gem_object_alloc();
812	if (obj == NULL)
813		return -ENOMEM;
814
815	drm_gem_private_object_init(dev, &obj->base, args->user_size);
816	i915_gem_object_init(obj, &i915_gem_userptr_ops);
817	obj->read_domains = I915_GEM_DOMAIN_CPU;
818	obj->write_domain = I915_GEM_DOMAIN_CPU;
819	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
820
821	obj->userptr.ptr = args->user_ptr;
822	if (args->flags & I915_USERPTR_READ_ONLY)
823		i915_gem_object_set_readonly(obj);
824
825	/* And keep a pointer to the current->mm for resolving the user pages
826	 * at binding. This means that we need to hook into the mmu_notifier
827	 * in order to detect if the mmu is destroyed.
828	 */
829	ret = i915_gem_userptr_init__mm_struct(obj);
830	if (ret == 0)
831		ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
832	if (ret == 0)
833		ret = drm_gem_handle_create(file, &obj->base, &handle);
834
835	/* drop reference from allocate - handle holds it now */
836	i915_gem_object_put(obj);
837	if (ret)
838		return ret;
839
840	args->handle = handle;
841	return 0;
842}
843
844int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
845{
846	mutex_init(&dev_priv->mm_lock);
847	hash_init(dev_priv->mm_structs);
848
849	dev_priv->mm.userptr_wq =
850		alloc_workqueue("i915-userptr-acquire",
851				WQ_HIGHPRI | WQ_UNBOUND,
852				0);
853	if (!dev_priv->mm.userptr_wq)
854		return -ENOMEM;
855
856	return 0;
857}
858
859void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
860{
861	destroy_workqueue(dev_priv->mm.userptr_wq);
862}