Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2014-2016 Intel Corporation
  5 */
  6
  7#include "display/intel_frontbuffer.h"
  8#include "gt/intel_gt.h"
  9
 10#include "i915_drv.h"
 11#include "i915_gem_clflush.h"
 12#include "i915_gem_domain.h"
 13#include "i915_gem_gtt.h"
 14#include "i915_gem_ioctls.h"
 15#include "i915_gem_lmem.h"
 16#include "i915_gem_mman.h"
 17#include "i915_gem_object.h"
 18#include "i915_vma.h"
 19
 20static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 21{
 22	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 23
 24	if (IS_DGFX(i915))
 25		return false;
 26
 27	return !(obj->cache_level == I915_CACHE_NONE ||
 28		 obj->cache_level == I915_CACHE_WT);
 29}
 30
 31bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 32{
 33	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 34
 35	if (obj->cache_dirty)
 36		return false;
 37
 38	if (IS_DGFX(i915))
 39		return false;
 40
 41	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
 42		return true;
 43
 44	/* Currently in use by HW (display engine)? Keep flushed. */
 45	return i915_gem_object_is_framebuffer(obj);
 46}
 47
 48static void
 49flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 50{
 51	struct i915_vma *vma;
 52
 53	assert_object_held(obj);
 54
 55	if (!(obj->write_domain & flush_domains))
 56		return;
 57
 58	switch (obj->write_domain) {
 59	case I915_GEM_DOMAIN_GTT:
 60		spin_lock(&obj->vma.lock);
 61		for_each_ggtt_vma(vma, obj) {
 62			if (i915_vma_unset_ggtt_write(vma))
 63				intel_gt_flush_ggtt_writes(vma->vm->gt);
 64		}
 65		spin_unlock(&obj->vma.lock);
 66
 67		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
 68		break;
 69
 70	case I915_GEM_DOMAIN_WC:
 71		wmb();
 72		break;
 73
 74	case I915_GEM_DOMAIN_CPU:
 75		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
 76		break;
 77
 78	case I915_GEM_DOMAIN_RENDER:
 79		if (gpu_write_needs_clflush(obj))
 80			obj->cache_dirty = true;
 81		break;
 82	}
 83
 84	obj->write_domain = 0;
 85}
 86
 87static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 88{
 89	/*
 90	 * We manually flush the CPU domain so that we can override and
 91	 * force the flush for the display, and perform it asyncrhonously.
 92	 */
 93	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
 94	if (obj->cache_dirty)
 95		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
 96	obj->write_domain = 0;
 97}
 98
 99void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
100{
101	if (!i915_gem_object_is_framebuffer(obj))
102		return;
103
104	i915_gem_object_lock(obj, NULL);
105	__i915_gem_object_flush_for_display(obj);
106	i915_gem_object_unlock(obj);
107}
108
109void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
110{
111	if (i915_gem_object_is_framebuffer(obj))
112		__i915_gem_object_flush_for_display(obj);
113}
114
115/**
116 * Moves a single object to the WC read, and possibly write domain.
117 * @obj: object to act on
118 * @write: ask for write access or read only
119 *
120 * This function returns when the move is complete, including waiting on
121 * flushes to occur.
122 */
123int
124i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
125{
126	int ret;
127
128	assert_object_held(obj);
129
130	ret = i915_gem_object_wait(obj,
131				   I915_WAIT_INTERRUPTIBLE |
132				   (write ? I915_WAIT_ALL : 0),
133				   MAX_SCHEDULE_TIMEOUT);
134	if (ret)
135		return ret;
136
137	if (obj->write_domain == I915_GEM_DOMAIN_WC)
138		return 0;
139
140	/* Flush and acquire obj->pages so that we are coherent through
141	 * direct access in memory with previous cached writes through
142	 * shmemfs and that our cache domain tracking remains valid.
143	 * For example, if the obj->filp was moved to swap without us
144	 * being notified and releasing the pages, we would mistakenly
145	 * continue to assume that the obj remained out of the CPU cached
146	 * domain.
147	 */
148	ret = i915_gem_object_pin_pages(obj);
149	if (ret)
150		return ret;
151
152	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
153
154	/* Serialise direct access to this object with the barriers for
155	 * coherent writes from the GPU, by effectively invalidating the
156	 * WC domain upon first access.
157	 */
158	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
159		mb();
160
161	/* It should now be out of any other write domains, and we can update
162	 * the domain values for our changes.
163	 */
164	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
165	obj->read_domains |= I915_GEM_DOMAIN_WC;
166	if (write) {
167		obj->read_domains = I915_GEM_DOMAIN_WC;
168		obj->write_domain = I915_GEM_DOMAIN_WC;
169		obj->mm.dirty = true;
170	}
171
172	i915_gem_object_unpin_pages(obj);
173	return 0;
174}
175
176/**
177 * Moves a single object to the GTT read, and possibly write domain.
178 * @obj: object to act on
179 * @write: ask for write access or read only
180 *
181 * This function returns when the move is complete, including waiting on
182 * flushes to occur.
183 */
184int
185i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
186{
187	int ret;
188
189	assert_object_held(obj);
190
191	ret = i915_gem_object_wait(obj,
192				   I915_WAIT_INTERRUPTIBLE |
193				   (write ? I915_WAIT_ALL : 0),
194				   MAX_SCHEDULE_TIMEOUT);
195	if (ret)
196		return ret;
197
198	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
199		return 0;
200
201	/* Flush and acquire obj->pages so that we are coherent through
202	 * direct access in memory with previous cached writes through
203	 * shmemfs and that our cache domain tracking remains valid.
204	 * For example, if the obj->filp was moved to swap without us
205	 * being notified and releasing the pages, we would mistakenly
206	 * continue to assume that the obj remained out of the CPU cached
207	 * domain.
208	 */
209	ret = i915_gem_object_pin_pages(obj);
210	if (ret)
211		return ret;
212
213	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
214
215	/* Serialise direct access to this object with the barriers for
216	 * coherent writes from the GPU, by effectively invalidating the
217	 * GTT domain upon first access.
218	 */
219	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
220		mb();
221
222	/* It should now be out of any other write domains, and we can update
223	 * the domain values for our changes.
224	 */
225	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
226	obj->read_domains |= I915_GEM_DOMAIN_GTT;
227	if (write) {
228		struct i915_vma *vma;
229
230		obj->read_domains = I915_GEM_DOMAIN_GTT;
231		obj->write_domain = I915_GEM_DOMAIN_GTT;
232		obj->mm.dirty = true;
233
234		spin_lock(&obj->vma.lock);
235		for_each_ggtt_vma(vma, obj)
236			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
237				i915_vma_set_ggtt_write(vma);
238		spin_unlock(&obj->vma.lock);
239	}
240
241	i915_gem_object_unpin_pages(obj);
242	return 0;
243}
244
245/**
246 * Changes the cache-level of an object across all VMA.
247 * @obj: object to act on
248 * @cache_level: new cache level to set for the object
249 *
250 * After this function returns, the object will be in the new cache-level
251 * across all GTT and the contents of the backing storage will be coherent,
252 * with respect to the new cache-level. In order to keep the backing storage
253 * coherent for all users, we only allow a single cache level to be set
254 * globally on the object and prevent it from being changed whilst the
255 * hardware is reading from the object. That is if the object is currently
256 * on the scanout it will be set to uncached (or equivalent display
257 * cache coherency) and all non-MOCS GPU access will also be uncached so
258 * that all direct access to the scanout remains coherent.
259 */
260int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
261				    enum i915_cache_level cache_level)
262{
263	int ret;
264
265	if (obj->cache_level == cache_level)
266		return 0;
267
268	ret = i915_gem_object_wait(obj,
269				   I915_WAIT_INTERRUPTIBLE |
270				   I915_WAIT_ALL,
271				   MAX_SCHEDULE_TIMEOUT);
272	if (ret)
273		return ret;
274
275	/* Always invalidate stale cachelines */
276	if (obj->cache_level != cache_level) {
277		i915_gem_object_set_cache_coherency(obj, cache_level);
278		obj->cache_dirty = true;
279	}
280
281	/* The cache-level will be applied when each vma is rebound. */
282	return i915_gem_object_unbind(obj,
283				      I915_GEM_OBJECT_UNBIND_ACTIVE |
284				      I915_GEM_OBJECT_UNBIND_BARRIER);
285}
286
287int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
288			       struct drm_file *file)
289{
290	struct drm_i915_gem_caching *args = data;
291	struct drm_i915_gem_object *obj;
292	int err = 0;
293
294	if (IS_DGFX(to_i915(dev)))
295		return -ENODEV;
296
297	rcu_read_lock();
298	obj = i915_gem_object_lookup_rcu(file, args->handle);
299	if (!obj) {
300		err = -ENOENT;
301		goto out;
302	}
303
304	switch (obj->cache_level) {
305	case I915_CACHE_LLC:
306	case I915_CACHE_L3_LLC:
307		args->caching = I915_CACHING_CACHED;
308		break;
309
310	case I915_CACHE_WT:
311		args->caching = I915_CACHING_DISPLAY;
312		break;
313
314	default:
315		args->caching = I915_CACHING_NONE;
316		break;
317	}
318out:
319	rcu_read_unlock();
320	return err;
321}
322
323int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
324			       struct drm_file *file)
325{
326	struct drm_i915_private *i915 = to_i915(dev);
327	struct drm_i915_gem_caching *args = data;
328	struct drm_i915_gem_object *obj;
329	enum i915_cache_level level;
330	int ret = 0;
331
332	if (IS_DGFX(i915))
333		return -ENODEV;
334
335	switch (args->caching) {
336	case I915_CACHING_NONE:
337		level = I915_CACHE_NONE;
338		break;
339	case I915_CACHING_CACHED:
340		/*
341		 * Due to a HW issue on BXT A stepping, GPU stores via a
342		 * snooped mapping may leave stale data in a corresponding CPU
343		 * cacheline, whereas normally such cachelines would get
344		 * invalidated.
345		 */
346		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
347			return -ENODEV;
348
349		level = I915_CACHE_LLC;
350		break;
351	case I915_CACHING_DISPLAY:
352		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
353		break;
354	default:
355		return -EINVAL;
356	}
357
358	obj = i915_gem_object_lookup(file, args->handle);
359	if (!obj)
360		return -ENOENT;
361
362	/*
363	 * The caching mode of proxy object is handled by its generator, and
364	 * not allowed to be changed by userspace.
365	 */
366	if (i915_gem_object_is_proxy(obj)) {
367		/*
368		 * Silently allow cached for userptr; the vulkan driver
369		 * sets all objects to cached
370		 */
371		if (!i915_gem_object_is_userptr(obj) ||
372		    args->caching != I915_CACHING_CACHED)
373			ret = -ENXIO;
374
375		goto out;
376	}
377
378	ret = i915_gem_object_lock_interruptible(obj, NULL);
379	if (ret)
380		goto out;
381
382	ret = i915_gem_object_set_cache_level(obj, level);
383	i915_gem_object_unlock(obj);
384
385out:
386	i915_gem_object_put(obj);
387	return ret;
388}
389
390/*
391 * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
392 * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
393 * (for pageflips). We only flush the caches while preparing the buffer for
394 * display, the callers are responsible for frontbuffer flush.
395 */
396struct i915_vma *
397i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
398				     struct i915_gem_ww_ctx *ww,
399				     u32 alignment,
400				     const struct i915_gtt_view *view,
401				     unsigned int flags)
402{
403	struct drm_i915_private *i915 = to_i915(obj->base.dev);
404	struct i915_vma *vma;
405	int ret;
406
407	/* Frame buffer must be in LMEM */
408	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
409		return ERR_PTR(-EINVAL);
410
411	/*
412	 * The display engine is not coherent with the LLC cache on gen6.  As
413	 * a result, we make sure that the pinning that is about to occur is
414	 * done with uncached PTEs. This is lowest common denominator for all
415	 * chipsets.
416	 *
417	 * However for gen6+, we could do better by using the GFDT bit instead
418	 * of uncaching, which would allow us to flush all the LLC-cached data
419	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
420	 */
421	ret = i915_gem_object_set_cache_level(obj,
422					      HAS_WT(i915) ?
423					      I915_CACHE_WT : I915_CACHE_NONE);
424	if (ret)
425		return ERR_PTR(ret);
426
427	/*
428	 * As the user may map the buffer once pinned in the display plane
429	 * (e.g. libkms for the bootup splash), we have to ensure that we
430	 * always use map_and_fenceable for all scanout buffers. However,
431	 * it may simply be too big to fit into mappable, in which case
432	 * put it anyway and hope that userspace can cope (but always first
433	 * try to preserve the existing ABI).
434	 */
435	vma = ERR_PTR(-ENOSPC);
436	if ((flags & PIN_MAPPABLE) == 0 &&
437	    (!view || view->type == I915_GTT_VIEW_NORMAL))
438		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
439						  flags | PIN_MAPPABLE |
440						  PIN_NONBLOCK);
441	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
442		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
443						  alignment, flags);
444	if (IS_ERR(vma))
445		return vma;
446
447	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
448	i915_vma_mark_scanout(vma);
449
450	i915_gem_object_flush_if_display_locked(obj);
451
452	return vma;
453}
454
455/**
456 * Moves a single object to the CPU read, and possibly write domain.
457 * @obj: object to act on
458 * @write: requesting write or read-only access
459 *
460 * This function returns when the move is complete, including waiting on
461 * flushes to occur.
462 */
463int
464i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
465{
466	int ret;
467
468	assert_object_held(obj);
469
470	ret = i915_gem_object_wait(obj,
471				   I915_WAIT_INTERRUPTIBLE |
472				   (write ? I915_WAIT_ALL : 0),
473				   MAX_SCHEDULE_TIMEOUT);
474	if (ret)
475		return ret;
476
477	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
478
479	/* Flush the CPU cache if it's still invalid. */
480	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
481		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
482		obj->read_domains |= I915_GEM_DOMAIN_CPU;
483	}
484
485	/* It should now be out of any other write domains, and we can update
486	 * the domain values for our changes.
487	 */
488	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
489
490	/* If we're writing through the CPU, then the GPU read domains will
491	 * need to be invalidated at next use.
492	 */
493	if (write)
494		__start_cpu_write(obj);
495
496	return 0;
497}
498
499/**
500 * Called when user space prepares to use an object with the CPU, either
501 * through the mmap ioctl's mapping or a GTT mapping.
502 * @dev: drm device
503 * @data: ioctl data blob
504 * @file: drm file
505 */
506int
507i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
508			  struct drm_file *file)
509{
510	struct drm_i915_gem_set_domain *args = data;
511	struct drm_i915_gem_object *obj;
512	u32 read_domains = args->read_domains;
513	u32 write_domain = args->write_domain;
514	int err;
515
516	if (IS_DGFX(to_i915(dev)))
517		return -ENODEV;
518
519	/* Only handle setting domains to types used by the CPU. */
520	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
521		return -EINVAL;
522
523	/*
524	 * Having something in the write domain implies it's in the read
525	 * domain, and only that read domain.  Enforce that in the request.
526	 */
527	if (write_domain && read_domains != write_domain)
528		return -EINVAL;
529
530	if (!read_domains)
531		return 0;
532
533	obj = i915_gem_object_lookup(file, args->handle);
534	if (!obj)
535		return -ENOENT;
536
537	/*
538	 * Try to flush the object off the GPU without holding the lock.
539	 * We will repeat the flush holding the lock in the normal manner
540	 * to catch cases where we are gazumped.
541	 */
542	err = i915_gem_object_wait(obj,
543				   I915_WAIT_INTERRUPTIBLE |
544				   I915_WAIT_PRIORITY |
545				   (write_domain ? I915_WAIT_ALL : 0),
546				   MAX_SCHEDULE_TIMEOUT);
547	if (err)
548		goto out;
549
550	if (i915_gem_object_is_userptr(obj)) {
551		/*
552		 * Try to grab userptr pages, iris uses set_domain to check
553		 * userptr validity
554		 */
555		err = i915_gem_object_userptr_validate(obj);
556		if (!err)
557			err = i915_gem_object_wait(obj,
558						   I915_WAIT_INTERRUPTIBLE |
559						   I915_WAIT_PRIORITY |
560						   (write_domain ? I915_WAIT_ALL : 0),
561						   MAX_SCHEDULE_TIMEOUT);
562		goto out;
563	}
564
565	/*
566	 * Proxy objects do not control access to the backing storage, ergo
567	 * they cannot be used as a means to manipulate the cache domain
568	 * tracking for that backing storage. The proxy object is always
569	 * considered to be outside of any cache domain.
570	 */
571	if (i915_gem_object_is_proxy(obj)) {
572		err = -ENXIO;
573		goto out;
574	}
575
576	err = i915_gem_object_lock_interruptible(obj, NULL);
577	if (err)
578		goto out;
579
580	/*
581	 * Flush and acquire obj->pages so that we are coherent through
582	 * direct access in memory with previous cached writes through
583	 * shmemfs and that our cache domain tracking remains valid.
584	 * For example, if the obj->filp was moved to swap without us
585	 * being notified and releasing the pages, we would mistakenly
586	 * continue to assume that the obj remained out of the CPU cached
587	 * domain.
588	 */
589	err = i915_gem_object_pin_pages(obj);
590	if (err)
591		goto out_unlock;
592
593	/*
594	 * Already in the desired write domain? Nothing for us to do!
595	 *
596	 * We apply a little bit of cunning here to catch a broader set of
597	 * no-ops. If obj->write_domain is set, we must be in the same
598	 * obj->read_domains, and only that domain. Therefore, if that
599	 * obj->write_domain matches the request read_domains, we are
600	 * already in the same read/write domain and can skip the operation,
601	 * without having to further check the requested write_domain.
602	 */
603	if (READ_ONCE(obj->write_domain) == read_domains)
604		goto out_unpin;
605
606	if (read_domains & I915_GEM_DOMAIN_WC)
607		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
608	else if (read_domains & I915_GEM_DOMAIN_GTT)
609		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
610	else
611		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
612
613out_unpin:
614	i915_gem_object_unpin_pages(obj);
615
616out_unlock:
617	i915_gem_object_unlock(obj);
618
619	if (!err && write_domain)
620		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
621
622out:
623	i915_gem_object_put(obj);
624	return err;
625}
626
627/*
628 * Pins the specified object's pages and synchronizes the object with
629 * GPU accesses. Sets needs_clflush to non-zero if the caller should
630 * flush the object from the CPU cache.
631 */
632int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
633				 unsigned int *needs_clflush)
634{
635	int ret;
636
637	*needs_clflush = 0;
638	if (!i915_gem_object_has_struct_page(obj))
639		return -ENODEV;
640
641	assert_object_held(obj);
642
643	ret = i915_gem_object_wait(obj,
644				   I915_WAIT_INTERRUPTIBLE,
645				   MAX_SCHEDULE_TIMEOUT);
646	if (ret)
647		return ret;
648
649	ret = i915_gem_object_pin_pages(obj);
650	if (ret)
651		return ret;
652
653	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
654	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
655		ret = i915_gem_object_set_to_cpu_domain(obj, false);
656		if (ret)
657			goto err_unpin;
658		else
659			goto out;
660	}
661
662	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
663
664	/* If we're not in the cpu read domain, set ourself into the gtt
665	 * read domain and manually flush cachelines (if required). This
666	 * optimizes for the case when the gpu will dirty the data
667	 * anyway again before the next pread happens.
668	 */
669	if (!obj->cache_dirty &&
670	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
671		*needs_clflush = CLFLUSH_BEFORE;
672
673out:
674	/* return with the pages pinned */
675	return 0;
676
677err_unpin:
678	i915_gem_object_unpin_pages(obj);
679	return ret;
680}
681
682int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
683				  unsigned int *needs_clflush)
684{
685	int ret;
686
687	*needs_clflush = 0;
688	if (!i915_gem_object_has_struct_page(obj))
689		return -ENODEV;
690
691	assert_object_held(obj);
692
693	ret = i915_gem_object_wait(obj,
694				   I915_WAIT_INTERRUPTIBLE |
695				   I915_WAIT_ALL,
696				   MAX_SCHEDULE_TIMEOUT);
697	if (ret)
698		return ret;
699
700	ret = i915_gem_object_pin_pages(obj);
701	if (ret)
702		return ret;
703
704	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
705	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
706		ret = i915_gem_object_set_to_cpu_domain(obj, true);
707		if (ret)
708			goto err_unpin;
709		else
710			goto out;
711	}
712
713	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
714
715	/* If we're not in the cpu write domain, set ourself into the
716	 * gtt write domain and manually flush cachelines (as required).
717	 * This optimizes for the case when the gpu will use the data
718	 * right away and we therefore have to clflush anyway.
719	 */
720	if (!obj->cache_dirty) {
721		*needs_clflush |= CLFLUSH_AFTER;
722
723		/*
724		 * Same trick applies to invalidate partially written
725		 * cachelines read before writing.
726		 */
727		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
728			*needs_clflush |= CLFLUSH_BEFORE;
729	}
730
731out:
732	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
733	obj->mm.dirty = true;
734	/* return with the pages pinned */
735	return 0;
736
737err_unpin:
738	i915_gem_object_unpin_pages(obj);
739	return ret;
740}