Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2014-2016 Intel Corporation
  5 */
  6
  7#include "display/intel_frontbuffer.h"
  8
  9#include "i915_drv.h"
 10#include "i915_gem_clflush.h"
 11#include "i915_gem_gtt.h"
 12#include "i915_gem_ioctls.h"
 13#include "i915_gem_object.h"
 14#include "i915_vma.h"
 15#include "i915_gem_lmem.h"
 16#include "i915_gem_mman.h"
 17
 18static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 19{
 20	/*
 21	 * We manually flush the CPU domain so that we can override and
 22	 * force the flush for the display, and perform it asyncrhonously.
 23	 */
 24	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 25	if (obj->cache_dirty)
 26		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 27	obj->write_domain = 0;
 28}
 29
 30void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 31{
 32	if (!i915_gem_object_is_framebuffer(obj))
 33		return;
 34
 35	i915_gem_object_lock(obj);
 36	__i915_gem_object_flush_for_display(obj);
 37	i915_gem_object_unlock(obj);
 38}
 39
 40/**
 41 * Moves a single object to the WC read, and possibly write domain.
 42 * @obj: object to act on
 43 * @write: ask for write access or read only
 44 *
 45 * This function returns when the move is complete, including waiting on
 46 * flushes to occur.
 47 */
 48int
 49i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 50{
 51	int ret;
 52
 53	assert_object_held(obj);
 54
 55	ret = i915_gem_object_wait(obj,
 56				   I915_WAIT_INTERRUPTIBLE |
 57				   (write ? I915_WAIT_ALL : 0),
 58				   MAX_SCHEDULE_TIMEOUT);
 59	if (ret)
 60		return ret;
 61
 62	if (obj->write_domain == I915_GEM_DOMAIN_WC)
 63		return 0;
 64
 65	/* Flush and acquire obj->pages so that we are coherent through
 66	 * direct access in memory with previous cached writes through
 67	 * shmemfs and that our cache domain tracking remains valid.
 68	 * For example, if the obj->filp was moved to swap without us
 69	 * being notified and releasing the pages, we would mistakenly
 70	 * continue to assume that the obj remained out of the CPU cached
 71	 * domain.
 72	 */
 73	ret = i915_gem_object_pin_pages(obj);
 74	if (ret)
 75		return ret;
 76
 77	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 78
 79	/* Serialise direct access to this object with the barriers for
 80	 * coherent writes from the GPU, by effectively invalidating the
 81	 * WC domain upon first access.
 82	 */
 83	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
 84		mb();
 85
 86	/* It should now be out of any other write domains, and we can update
 87	 * the domain values for our changes.
 88	 */
 89	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
 90	obj->read_domains |= I915_GEM_DOMAIN_WC;
 91	if (write) {
 92		obj->read_domains = I915_GEM_DOMAIN_WC;
 93		obj->write_domain = I915_GEM_DOMAIN_WC;
 94		obj->mm.dirty = true;
 95	}
 96
 97	i915_gem_object_unpin_pages(obj);
 98	return 0;
 99}
100
101/**
102 * Moves a single object to the GTT read, and possibly write domain.
103 * @obj: object to act on
104 * @write: ask for write access or read only
105 *
106 * This function returns when the move is complete, including waiting on
107 * flushes to occur.
108 */
109int
110i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
111{
112	int ret;
113
114	assert_object_held(obj);
115
116	ret = i915_gem_object_wait(obj,
117				   I915_WAIT_INTERRUPTIBLE |
118				   (write ? I915_WAIT_ALL : 0),
119				   MAX_SCHEDULE_TIMEOUT);
120	if (ret)
121		return ret;
122
123	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
124		return 0;
125
126	/* Flush and acquire obj->pages so that we are coherent through
127	 * direct access in memory with previous cached writes through
128	 * shmemfs and that our cache domain tracking remains valid.
129	 * For example, if the obj->filp was moved to swap without us
130	 * being notified and releasing the pages, we would mistakenly
131	 * continue to assume that the obj remained out of the CPU cached
132	 * domain.
133	 */
134	ret = i915_gem_object_pin_pages(obj);
135	if (ret)
136		return ret;
137
138	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
139
140	/* Serialise direct access to this object with the barriers for
141	 * coherent writes from the GPU, by effectively invalidating the
142	 * GTT domain upon first access.
143	 */
144	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
145		mb();
146
147	/* It should now be out of any other write domains, and we can update
148	 * the domain values for our changes.
149	 */
150	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
151	obj->read_domains |= I915_GEM_DOMAIN_GTT;
152	if (write) {
153		struct i915_vma *vma;
154
155		obj->read_domains = I915_GEM_DOMAIN_GTT;
156		obj->write_domain = I915_GEM_DOMAIN_GTT;
157		obj->mm.dirty = true;
158
159		spin_lock(&obj->vma.lock);
160		for_each_ggtt_vma(vma, obj)
161			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
162				i915_vma_set_ggtt_write(vma);
163		spin_unlock(&obj->vma.lock);
164	}
165
166	i915_gem_object_unpin_pages(obj);
167	return 0;
168}
169
170/**
171 * Changes the cache-level of an object across all VMA.
172 * @obj: object to act on
173 * @cache_level: new cache level to set for the object
174 *
175 * After this function returns, the object will be in the new cache-level
176 * across all GTT and the contents of the backing storage will be coherent,
177 * with respect to the new cache-level. In order to keep the backing storage
178 * coherent for all users, we only allow a single cache level to be set
179 * globally on the object and prevent it from being changed whilst the
180 * hardware is reading from the object. That is if the object is currently
181 * on the scanout it will be set to uncached (or equivalent display
182 * cache coherency) and all non-MOCS GPU access will also be uncached so
183 * that all direct access to the scanout remains coherent.
184 */
185int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
186				    enum i915_cache_level cache_level)
187{
188	int ret;
189
190	if (obj->cache_level == cache_level)
191		return 0;
192
193	ret = i915_gem_object_wait(obj,
194				   I915_WAIT_INTERRUPTIBLE |
195				   I915_WAIT_ALL,
196				   MAX_SCHEDULE_TIMEOUT);
197	if (ret)
198		return ret;
199
200	ret = i915_gem_object_lock_interruptible(obj);
201	if (ret)
202		return ret;
203
204	/* Always invalidate stale cachelines */
205	if (obj->cache_level != cache_level) {
206		i915_gem_object_set_cache_coherency(obj, cache_level);
207		obj->cache_dirty = true;
208	}
209
210	i915_gem_object_unlock(obj);
211
212	/* The cache-level will be applied when each vma is rebound. */
213	return i915_gem_object_unbind(obj,
214				      I915_GEM_OBJECT_UNBIND_ACTIVE |
215				      I915_GEM_OBJECT_UNBIND_BARRIER);
216}
217
218int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
219			       struct drm_file *file)
220{
221	struct drm_i915_gem_caching *args = data;
222	struct drm_i915_gem_object *obj;
223	int err = 0;
224
225	rcu_read_lock();
226	obj = i915_gem_object_lookup_rcu(file, args->handle);
227	if (!obj) {
228		err = -ENOENT;
229		goto out;
230	}
231
232	switch (obj->cache_level) {
233	case I915_CACHE_LLC:
234	case I915_CACHE_L3_LLC:
235		args->caching = I915_CACHING_CACHED;
236		break;
237
238	case I915_CACHE_WT:
239		args->caching = I915_CACHING_DISPLAY;
240		break;
241
242	default:
243		args->caching = I915_CACHING_NONE;
244		break;
245	}
246out:
247	rcu_read_unlock();
248	return err;
249}
250
251int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
252			       struct drm_file *file)
253{
254	struct drm_i915_private *i915 = to_i915(dev);
255	struct drm_i915_gem_caching *args = data;
256	struct drm_i915_gem_object *obj;
257	enum i915_cache_level level;
258	int ret = 0;
259
260	switch (args->caching) {
261	case I915_CACHING_NONE:
262		level = I915_CACHE_NONE;
263		break;
264	case I915_CACHING_CACHED:
265		/*
266		 * Due to a HW issue on BXT A stepping, GPU stores via a
267		 * snooped mapping may leave stale data in a corresponding CPU
268		 * cacheline, whereas normally such cachelines would get
269		 * invalidated.
270		 */
271		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
272			return -ENODEV;
273
274		level = I915_CACHE_LLC;
275		break;
276	case I915_CACHING_DISPLAY:
277		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
278		break;
279	default:
280		return -EINVAL;
281	}
282
283	obj = i915_gem_object_lookup(file, args->handle);
284	if (!obj)
285		return -ENOENT;
286
287	/*
288	 * The caching mode of proxy object is handled by its generator, and
289	 * not allowed to be changed by userspace.
290	 */
291	if (i915_gem_object_is_proxy(obj)) {
292		ret = -ENXIO;
293		goto out;
294	}
295
296	ret = i915_gem_object_set_cache_level(obj, level);
297
298out:
299	i915_gem_object_put(obj);
300	return ret;
301}
302
303/*
304 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
305 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
306 * (for pageflips). We only flush the caches while preparing the buffer for
307 * display, the callers are responsible for frontbuffer flush.
308 */
309struct i915_vma *
310i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
311				     u32 alignment,
312				     const struct i915_ggtt_view *view,
313				     unsigned int flags)
314{
315	struct drm_i915_private *i915 = to_i915(obj->base.dev);
316	struct i915_vma *vma;
317	int ret;
318
319	/* Frame buffer must be in LMEM (no migration yet) */
320	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
321		return ERR_PTR(-EINVAL);
322
323	/*
324	 * The display engine is not coherent with the LLC cache on gen6.  As
325	 * a result, we make sure that the pinning that is about to occur is
326	 * done with uncached PTEs. This is lowest common denominator for all
327	 * chipsets.
328	 *
329	 * However for gen6+, we could do better by using the GFDT bit instead
330	 * of uncaching, which would allow us to flush all the LLC-cached data
331	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
332	 */
333	ret = i915_gem_object_set_cache_level(obj,
334					      HAS_WT(i915) ?
335					      I915_CACHE_WT : I915_CACHE_NONE);
336	if (ret)
337		return ERR_PTR(ret);
338
339	/*
340	 * As the user may map the buffer once pinned in the display plane
341	 * (e.g. libkms for the bootup splash), we have to ensure that we
342	 * always use map_and_fenceable for all scanout buffers. However,
343	 * it may simply be too big to fit into mappable, in which case
344	 * put it anyway and hope that userspace can cope (but always first
345	 * try to preserve the existing ABI).
346	 */
347	vma = ERR_PTR(-ENOSPC);
348	if ((flags & PIN_MAPPABLE) == 0 &&
349	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
350		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
351					       flags |
352					       PIN_MAPPABLE |
353					       PIN_NONBLOCK);
354	if (IS_ERR(vma))
355		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
356	if (IS_ERR(vma))
357		return vma;
358
359	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
360
361	i915_gem_object_flush_if_display(obj);
362
363	return vma;
364}
365
366static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
367{
368	struct drm_i915_private *i915 = to_i915(obj->base.dev);
369	struct i915_vma *vma;
370
371	if (list_empty(&obj->vma.list))
372		return;
373
374	mutex_lock(&i915->ggtt.vm.mutex);
375	spin_lock(&obj->vma.lock);
376	for_each_ggtt_vma(vma, obj) {
377		if (!drm_mm_node_allocated(&vma->node))
378			continue;
379
380		GEM_BUG_ON(vma->vm != &i915->ggtt.vm);
381		list_move_tail(&vma->vm_link, &vma->vm->bound_list);
382	}
383	spin_unlock(&obj->vma.lock);
384	mutex_unlock(&i915->ggtt.vm.mutex);
385
386	if (i915_gem_object_is_shrinkable(obj)) {
387		unsigned long flags;
388
389		spin_lock_irqsave(&i915->mm.obj_lock, flags);
390
391		if (obj->mm.madv == I915_MADV_WILLNEED &&
392		    !atomic_read(&obj->mm.shrink_pin))
393			list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
394
395		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
396	}
397}
398
399void
400i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
401{
402	/* Bump the LRU to try and avoid premature eviction whilst flipping  */
403	i915_gem_object_bump_inactive_ggtt(vma->obj);
404
405	i915_vma_unpin(vma);
406}
407
408/**
409 * Moves a single object to the CPU read, and possibly write domain.
410 * @obj: object to act on
411 * @write: requesting write or read-only access
412 *
413 * This function returns when the move is complete, including waiting on
414 * flushes to occur.
415 */
416int
417i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
418{
419	int ret;
420
421	assert_object_held(obj);
422
423	ret = i915_gem_object_wait(obj,
424				   I915_WAIT_INTERRUPTIBLE |
425				   (write ? I915_WAIT_ALL : 0),
426				   MAX_SCHEDULE_TIMEOUT);
427	if (ret)
428		return ret;
429
430	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
431
432	/* Flush the CPU cache if it's still invalid. */
433	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
434		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
435		obj->read_domains |= I915_GEM_DOMAIN_CPU;
436	}
437
438	/* It should now be out of any other write domains, and we can update
439	 * the domain values for our changes.
440	 */
441	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
442
443	/* If we're writing through the CPU, then the GPU read domains will
444	 * need to be invalidated at next use.
445	 */
446	if (write)
447		__start_cpu_write(obj);
448
449	return 0;
450}
451
452/**
453 * Called when user space prepares to use an object with the CPU, either
454 * through the mmap ioctl's mapping or a GTT mapping.
455 * @dev: drm device
456 * @data: ioctl data blob
457 * @file: drm file
458 */
459int
460i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
461			  struct drm_file *file)
462{
463	struct drm_i915_gem_set_domain *args = data;
464	struct drm_i915_gem_object *obj;
465	u32 read_domains = args->read_domains;
466	u32 write_domain = args->write_domain;
467	int err;
468
469	/* Only handle setting domains to types used by the CPU. */
470	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
471		return -EINVAL;
472
473	/*
474	 * Having something in the write domain implies it's in the read
475	 * domain, and only that read domain.  Enforce that in the request.
476	 */
477	if (write_domain && read_domains != write_domain)
478		return -EINVAL;
479
480	if (!read_domains)
481		return 0;
482
483	obj = i915_gem_object_lookup(file, args->handle);
484	if (!obj)
485		return -ENOENT;
486
487	/*
488	 * Already in the desired write domain? Nothing for us to do!
489	 *
490	 * We apply a little bit of cunning here to catch a broader set of
491	 * no-ops. If obj->write_domain is set, we must be in the same
492	 * obj->read_domains, and only that domain. Therefore, if that
493	 * obj->write_domain matches the request read_domains, we are
494	 * already in the same read/write domain and can skip the operation,
495	 * without having to further check the requested write_domain.
496	 */
497	if (READ_ONCE(obj->write_domain) == read_domains) {
498		err = 0;
499		goto out;
500	}
501
502	/*
503	 * Try to flush the object off the GPU without holding the lock.
504	 * We will repeat the flush holding the lock in the normal manner
505	 * to catch cases where we are gazumped.
506	 */
507	err = i915_gem_object_wait(obj,
508				   I915_WAIT_INTERRUPTIBLE |
509				   I915_WAIT_PRIORITY |
510				   (write_domain ? I915_WAIT_ALL : 0),
511				   MAX_SCHEDULE_TIMEOUT);
512	if (err)
513		goto out;
514
515	/*
516	 * Proxy objects do not control access to the backing storage, ergo
517	 * they cannot be used as a means to manipulate the cache domain
518	 * tracking for that backing storage. The proxy object is always
519	 * considered to be outside of any cache domain.
520	 */
521	if (i915_gem_object_is_proxy(obj)) {
522		err = -ENXIO;
523		goto out;
524	}
525
526	/*
527	 * Flush and acquire obj->pages so that we are coherent through
528	 * direct access in memory with previous cached writes through
529	 * shmemfs and that our cache domain tracking remains valid.
530	 * For example, if the obj->filp was moved to swap without us
531	 * being notified and releasing the pages, we would mistakenly
532	 * continue to assume that the obj remained out of the CPU cached
533	 * domain.
534	 */
535	err = i915_gem_object_pin_pages(obj);
536	if (err)
537		goto out;
538
539	err = i915_gem_object_lock_interruptible(obj);
540	if (err)
541		goto out_unpin;
542
543	if (read_domains & I915_GEM_DOMAIN_WC)
544		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
545	else if (read_domains & I915_GEM_DOMAIN_GTT)
546		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
547	else
548		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
549
550	/* And bump the LRU for this access */
551	i915_gem_object_bump_inactive_ggtt(obj);
552
553	i915_gem_object_unlock(obj);
554
555	if (write_domain)
556		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
557
558out_unpin:
559	i915_gem_object_unpin_pages(obj);
560out:
561	i915_gem_object_put(obj);
562	return err;
563}
564
565/*
566 * Pins the specified object's pages and synchronizes the object with
567 * GPU accesses. Sets needs_clflush to non-zero if the caller should
568 * flush the object from the CPU cache.
569 */
570int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
571				 unsigned int *needs_clflush)
572{
573	int ret;
574
575	*needs_clflush = 0;
576	if (!i915_gem_object_has_struct_page(obj))
577		return -ENODEV;
578
579	ret = i915_gem_object_lock_interruptible(obj);
580	if (ret)
581		return ret;
582
583	ret = i915_gem_object_wait(obj,
584				   I915_WAIT_INTERRUPTIBLE,
585				   MAX_SCHEDULE_TIMEOUT);
586	if (ret)
587		goto err_unlock;
588
589	ret = i915_gem_object_pin_pages(obj);
590	if (ret)
591		goto err_unlock;
592
593	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
594	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
595		ret = i915_gem_object_set_to_cpu_domain(obj, false);
596		if (ret)
597			goto err_unpin;
598		else
599			goto out;
600	}
601
602	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
603
604	/* If we're not in the cpu read domain, set ourself into the gtt
605	 * read domain and manually flush cachelines (if required). This
606	 * optimizes for the case when the gpu will dirty the data
607	 * anyway again before the next pread happens.
608	 */
609	if (!obj->cache_dirty &&
610	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
611		*needs_clflush = CLFLUSH_BEFORE;
612
613out:
614	/* return with the pages pinned */
615	return 0;
616
617err_unpin:
618	i915_gem_object_unpin_pages(obj);
619err_unlock:
620	i915_gem_object_unlock(obj);
621	return ret;
622}
623
624int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
625				  unsigned int *needs_clflush)
626{
627	int ret;
628
629	*needs_clflush = 0;
630	if (!i915_gem_object_has_struct_page(obj))
631		return -ENODEV;
632
633	ret = i915_gem_object_lock_interruptible(obj);
634	if (ret)
635		return ret;
636
637	ret = i915_gem_object_wait(obj,
638				   I915_WAIT_INTERRUPTIBLE |
639				   I915_WAIT_ALL,
640				   MAX_SCHEDULE_TIMEOUT);
641	if (ret)
642		goto err_unlock;
643
644	ret = i915_gem_object_pin_pages(obj);
645	if (ret)
646		goto err_unlock;
647
648	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
649	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
650		ret = i915_gem_object_set_to_cpu_domain(obj, true);
651		if (ret)
652			goto err_unpin;
653		else
654			goto out;
655	}
656
657	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
658
659	/* If we're not in the cpu write domain, set ourself into the
660	 * gtt write domain and manually flush cachelines (as required).
661	 * This optimizes for the case when the gpu will use the data
662	 * right away and we therefore have to clflush anyway.
663	 */
664	if (!obj->cache_dirty) {
665		*needs_clflush |= CLFLUSH_AFTER;
666
667		/*
668		 * Same trick applies to invalidate partially written
669		 * cachelines read before writing.
670		 */
671		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
672			*needs_clflush |= CLFLUSH_BEFORE;
673	}
674
675out:
676	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
677	obj->mm.dirty = true;
678	/* return with the pages pinned */
679	return 0;
680
681err_unpin:
682	i915_gem_object_unpin_pages(obj);
683err_unlock:
684	i915_gem_object_unlock(obj);
685	return ret;
686}