i915_gem_execbuffer.c - drivers/gpu/drm/i915/i915_gem_execbuffer.c - Linux diff v3.1

   1/*
   2 * Copyright © 2008,2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Chris Wilson <chris@chris-wilson.co.uk>
  26 *
  27 */
  28
  29#include "drmP.h"
  30#include "drm.h"
  31#include "i915_drm.h"
 
 
 
 
 
 
  32#include "i915_drv.h"
 
  33#include "i915_trace.h"
  34#include "intel_drv.h"
 
  35
  36struct change_domains {
  37	uint32_t invalidate_domains;
  38	uint32_t flush_domains;
  39	uint32_t flush_rings;
  40	uint32_t flips;
  41};
  42
  43/*
  44 * Set the next domain for the specified object. This
  45 * may not actually perform the necessary flushing/invaliding though,
  46 * as that may want to be batched with other set_domain operations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  47 *
  48 * This is (we hope) the only really tricky part of gem. The goal
  49 * is fairly simple -- track which caches hold bits of the object
  50 * and make sure they remain coherent. A few concrete examples may
  51 * help to explain how it works. For shorthand, we use the notation
  52 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
  53 * a pair of read and write domain masks.
 
 
 
 
 
  54 *
  55 * Case 1: the batch buffer
  56 *
  57 *	1. Allocated
  58 *	2. Written by CPU
  59 *	3. Mapped to GTT
  60 *	4. Read by GPU
  61 *	5. Unmapped from GTT
  62 *	6. Freed
  63 *
  64 *	Let's take these a step at a time
 
 
 
 
 
 
 
 
 
 
 
 
  65 *
  66 *	1. Allocated
  67 *		Pages allocated from the kernel may still have
  68 *		cache contents, so we set them to (CPU, CPU) always.
  69 *	2. Written by CPU (using pwrite)
  70 *		The pwrite function calls set_domain (CPU, CPU) and
  71 *		this function does nothing (as nothing changes)
  72 *	3. Mapped by GTT
  73 *		This function asserts that the object is not
  74 *		currently in any GPU-based read or write domains
  75 *	4. Read by GPU
  76 *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
  77 *		As write_domain is zero, this function adds in the
  78 *		current read domains (CPU+COMMAND, 0).
  79 *		flush_domains is set to CPU.
  80 *		invalidate_domains is set to COMMAND
  81 *		clflush is run to get data out of the CPU caches
  82 *		then i915_dev_set_domain calls i915_gem_flush to
  83 *		emit an MI_FLUSH and drm_agp_chipset_flush
  84 *	5. Unmapped from GTT
  85 *		i915_gem_object_unbind calls set_domain (CPU, CPU)
  86 *		flush_domains and invalidate_domains end up both zero
  87 *		so no flushing/invalidating happens
  88 *	6. Freed
  89 *		yay, done
  90 *
  91 * Case 2: The shared render buffer
 
  92 *
  93 *	1. Allocated
  94 *	2. Mapped to GTT
  95 *	3. Read/written by GPU
  96 *	4. set_domain to (CPU,CPU)
  97 *	5. Read/written by CPU
  98 *	6. Read/written by GPU
  99 *
 100 *	1. Allocated
 101 *		Same as last example, (CPU, CPU)
 102 *	2. Mapped to GTT
 103 *		Nothing changes (assertions find that it is not in the GPU)
 104 *	3. Read/written by GPU
 105 *		execbuffer calls set_domain (RENDER, RENDER)
 106 *		flush_domains gets CPU
 107 *		invalidate_domains gets GPU
 108 *		clflush (obj)
 109 *		MI_FLUSH and drm_agp_chipset_flush
 110 *	4. set_domain (CPU, CPU)
 111 *		flush_domains gets GPU
 112 *		invalidate_domains gets CPU
 113 *		wait_rendering (obj) to make sure all drawing is complete.
 114 *		This will include an MI_FLUSH to get the data from GPU
 115 *		to memory
 116 *		clflush (obj) to invalidate the CPU cache
 117 *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
 118 *	5. Read/written by CPU
 119 *		cache lines are loaded and dirtied
 120 *	6. Read written by GPU
 121 *		Same as last GPU access
 122 *
 123 * Case 3: The constant buffer
 
 
 
 
 124 *
 125 *	1. Allocated
 126 *	2. Written by CPU
 127 *	3. Read by GPU
 128 *	4. Updated (written) by CPU again
 129 *	5. Read by GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 130 *
 131 *	1. Allocated
 132 *		(CPU, CPU)
 133 *	2. Written by CPU
 134 *		(CPU, CPU)
 135 *	3. Read by GPU
 136 *		(CPU+RENDER, 0)
 137 *		flush_domains = CPU
 138 *		invalidate_domains = RENDER
 139 *		clflush (obj)
 140 *		MI_FLUSH
 141 *		drm_agp_chipset_flush
 142 *	4. Updated (written) by CPU again
 143 *		(CPU, CPU)
 144 *		flush_domains = 0 (no previous write domain)
 145 *		invalidate_domains = 0 (no new read domains)
 146 *	5. Read by GPU
 147 *		(CPU+RENDER, 0)
 148 *		flush_domains = CPU
 149 *		invalidate_domains = RENDER
 150 *		clflush (obj)
 151 *		MI_FLUSH
 152 *		drm_agp_chipset_flush
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 153 */
 154static void
 155i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
 156				  struct intel_ring_buffer *ring,
 157				  struct change_domains *cd)
 158{
 159	uint32_t invalidate_domains = 0, flush_domains = 0;
 160
 161	/*
 162	 * If the object isn't moving to a new write domain,
 163	 * let the object stay in multiple read domains
 164	 */
 165	if (obj->base.pending_write_domain == 0)
 166		obj->base.pending_read_domains |= obj->base.read_domains;
 167
 168	/*
 169	 * Flush the current write domain if
 170	 * the new read domains don't match. Invalidate
 171	 * any read domains which differ from the old
 172	 * write domain
 173	 */
 174	if (obj->base.write_domain &&
 175	    (((obj->base.write_domain != obj->base.pending_read_domains ||
 176	       obj->ring != ring)) ||
 177	     (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
 178		flush_domains |= obj->base.write_domain;
 179		invalidate_domains |=
 180			obj->base.pending_read_domains & ~obj->base.write_domain;
 181	}
 182	/*
 183	 * Invalidate any read caches which may have
 184	 * stale data. That is, any new read domains.
 
 
 
 
 
 
 
 
 
 
 
 185	 */
 186	invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
 187	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
 188		i915_gem_clflush_object(obj);
 189
 190	if (obj->base.pending_write_domain)
 191		cd->flips |= atomic_read(&obj->pending_flip);
 192
 193	/* The actual obj->write_domain will be updated with
 194	 * pending_write_domain after we emit the accumulated flush for all
 195	 * of our domain changes in execbuffers (which clears objects'
 196	 * write_domains).  So if we have a current write domain that we
 197	 * aren't changing, set pending_write_domain to that.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 198	 */
 199	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
 200		obj->base.pending_write_domain = obj->base.write_domain;
 201
 202	cd->invalidate_domains |= invalidate_domains;
 203	cd->flush_domains |= flush_domains;
 204	if (flush_domains & I915_GEM_GPU_DOMAINS)
 205		cd->flush_rings |= obj->ring->id;
 206	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
 207		cd->flush_rings |= ring->id;
 208}
 209
 210struct eb_objects {
 211	int and;
 212	struct hlist_head buckets[0];
 213};
 214
 215static struct eb_objects *
 216eb_create(int size)
 217{
 218	struct eb_objects *eb;
 219	int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
 220	while (count > size)
 221		count >>= 1;
 222	eb = kzalloc(count*sizeof(struct hlist_head) +
 223		     sizeof(struct eb_objects),
 224		     GFP_KERNEL);
 225	if (eb == NULL)
 226		return eb;
 227
 228	eb->and = count - 1;
 229	return eb;
 
 
 
 
 
 
 
 
 
 
 230}
 231
 232static void
 233eb_reset(struct eb_objects *eb)
 234{
 235	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
 236}
 237
 238static void
 239eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
 240{
 241	hlist_add_head(&obj->exec_node,
 242		       &eb->buckets[obj->exec_handle & eb->and]);
 243}
 244
 245static struct drm_i915_gem_object *
 246eb_get_object(struct eb_objects *eb, unsigned long handle)
 247{
 248	struct hlist_head *head;
 249	struct hlist_node *node;
 250	struct drm_i915_gem_object *obj;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 251
 252	head = &eb->buckets[handle & eb->and];
 253	hlist_for_each(node, head) {
 254		obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
 255		if (obj->exec_handle == handle)
 256			return obj;
 
 257	}
 258
 259	return NULL;
 260}
 261
 262static void
 263eb_destroy(struct eb_objects *eb)
 
 
 264{
 265	kfree(eb);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 266}
 267
 268static int
 269i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 270				   struct eb_objects *eb,
 271				   struct drm_i915_gem_relocation_entry *reloc)
 272{
 273	struct drm_device *dev = obj->base.dev;
 274	struct drm_gem_object *target_obj;
 275	uint32_t target_offset;
 276	int ret = -EINVAL;
 277
 278	/* we've already hold a reference to all valid objects */
 279	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
 280	if (unlikely(target_obj == NULL))
 281		return -ENOENT;
 282
 283	target_offset = to_intel_bo(target_obj)->gtt_offset;
 
 
 284
 285	/* The target buffer should have appeared before us in the
 286	 * exec_object list, so it should have a GTT space bound by now.
 287	 */
 288	if (unlikely(target_offset == 0)) {
 289		DRM_ERROR("No GTT space found for object %d\n",
 290			  reloc->target_handle);
 291		return ret;
 292	}
 293
 294	/* Validate that the target is in a valid r/w GPU domain */
 295	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
 296		DRM_ERROR("reloc with multiple write domains: "
 297			  "obj %p target %d offset %d "
 298			  "read %08x write %08x",
 299			  obj, reloc->target_handle,
 300			  (int) reloc->offset,
 301			  reloc->read_domains,
 302			  reloc->write_domain);
 303		return ret;
 304	}
 305	if (unlikely((reloc->write_domain | reloc->read_domains) & I915_GEM_DOMAIN_CPU)) {
 306		DRM_ERROR("reloc with read/write CPU domains: "
 307			  "obj %p target %d offset %d "
 308			  "read %08x write %08x",
 309			  obj, reloc->target_handle,
 310			  (int) reloc->offset,
 311			  reloc->read_domains,
 312			  reloc->write_domain);
 313		return ret;
 314	}
 315	if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
 316		     reloc->write_domain != target_obj->pending_write_domain)) {
 317		DRM_ERROR("Write domain conflict: "
 318			  "obj %p target %d offset %d "
 319			  "new %08x old %08x\n",
 320			  obj, reloc->target_handle,
 321			  (int) reloc->offset,
 322			  reloc->write_domain,
 323			  target_obj->pending_write_domain);
 324		return ret;
 325	}
 326
 327	target_obj->pending_read_domains |= reloc->read_domains;
 328	target_obj->pending_write_domain |= reloc->write_domain;
 
 329
 330	/* If the relocation already has the right value in it, no
 331	 * more work needs to be done.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 332	 */
 333	if (target_offset == reloc->presumed_offset)
 334		return 0;
 
 335
 336	/* Check that the relocation address is valid... */
 337	if (unlikely(reloc->offset > obj->base.size - 4)) {
 338		DRM_ERROR("Relocation beyond object bounds: "
 339			  "obj %p target %d offset %d size %d.\n",
 340			  obj, reloc->target_handle,
 341			  (int) reloc->offset,
 342			  (int) obj->base.size);
 343		return ret;
 344	}
 345	if (unlikely(reloc->offset & 3)) {
 346		DRM_ERROR("Relocation not 4-byte aligned: "
 347			  "obj %p target %d offset %d.\n",
 348			  obj, reloc->target_handle,
 349			  (int) reloc->offset);
 350		return ret;
 351	}
 352
 353	reloc->delta += target_offset;
 354	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
 355		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
 356		char *vaddr;
 
 
 357
 358		vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
 359		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
 360		kunmap_atomic(vaddr);
 361	} else {
 362		struct drm_i915_private *dev_priv = dev->dev_private;
 363		uint32_t __iomem *reloc_entry;
 364		void __iomem *reloc_page;
 365
 366		/* We can't wait for rendering with pagefaults disabled */
 367		if (obj->active && in_atomic())
 368			return -EFAULT;
 369
 370		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
 371		if (ret)
 372			return ret;
 373
 374		/* Map the page containing the relocation we're going to perform.  */
 375		reloc->offset += obj->gtt_offset;
 376		reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
 377						      reloc->offset & PAGE_MASK);
 378		reloc_entry = (uint32_t __iomem *)
 379			(reloc_page + (reloc->offset & ~PAGE_MASK));
 380		iowrite32(reloc->delta, reloc_entry);
 381		io_mapping_unmap_atomic(reloc_page);
 382	}
 383
 384	/* and update the user's relocation entry */
 385	reloc->presumed_offset = target_offset;
 386
 387	return 0;
 388}
 389
 390static int
 391i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
 392				    struct eb_objects *eb)
 393{
 394	struct drm_i915_gem_relocation_entry __user *user_relocs;
 395	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 396	int i, ret;
 397
 398	user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
 399	for (i = 0; i < entry->relocation_count; i++) {
 400		struct drm_i915_gem_relocation_entry reloc;
 401
 402		if (__copy_from_user_inatomic(&reloc,
 403					      user_relocs+i,
 404					      sizeof(reloc)))
 405			return -EFAULT;
 406
 407		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
 408		if (ret)
 409			return ret;
 410
 411		if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
 412					    &reloc.presumed_offset,
 413					    sizeof(reloc.presumed_offset)))
 414			return -EFAULT;
 415	}
 416
 417	return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 418}
 419
 420static int
 421i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
 422					 struct eb_objects *eb,
 423					 struct drm_i915_gem_relocation_entry *relocs)
 424{
 425	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 426	int i, ret;
 427
 428	for (i = 0; i < entry->relocation_count; i++) {
 429		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
 430		if (ret)
 431			return ret;
 432	}
 433
 434	return 0;
 
 
 
 
 
 435}
 436
 437static int
 438i915_gem_execbuffer_relocate(struct drm_device *dev,
 439			     struct eb_objects *eb,
 440			     struct list_head *objects)
 441{
 442	struct drm_i915_gem_object *obj;
 443	int ret = 0;
 
 
 444
 445	/* This is the fast path and we cannot handle a pagefault whilst
 446	 * holding the struct mutex lest the user pass in the relocations
 447	 * contained within a mmaped bo. For in such a case we, the page
 448	 * fault handler would call i915_gem_fault() and we would try to
 449	 * acquire the struct mutex again. Obviously this is bad and so
 450	 * lockdep complains vehemently.
 
 451	 */
 452	pagefault_disable();
 453	list_for_each_entry(obj, objects, exec_list) {
 454		ret = i915_gem_execbuffer_relocate_object(obj, eb);
 455		if (ret)
 456			break;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 457	}
 458	pagefault_enable();
 459
 460	return ret;
 
 
 
 461}
 462
 463static int
 464i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 465			    struct drm_file *file,
 466			    struct list_head *objects)
 467{
 468	struct drm_i915_gem_object *obj;
 469	int ret, retry;
 470	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 471	struct list_head ordered_objects;
 472
 473	INIT_LIST_HEAD(&ordered_objects);
 474	while (!list_empty(objects)) {
 475		struct drm_i915_gem_exec_object2 *entry;
 476		bool need_fence, need_mappable;
 477
 478		obj = list_first_entry(objects,
 479				       struct drm_i915_gem_object,
 480				       exec_list);
 481		entry = obj->exec_entry;
 482
 483		need_fence =
 484			has_fenced_gpu_access &&
 485			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 486			obj->tiling_mode != I915_TILING_NONE;
 487		need_mappable =
 488			entry->relocation_count ? true : need_fence;
 489
 490		if (need_mappable)
 491			list_move(&obj->exec_list, &ordered_objects);
 492		else
 493			list_move_tail(&obj->exec_list, &ordered_objects);
 494
 495		obj->base.pending_read_domains = 0;
 496		obj->base.pending_write_domain = 0;
 497	}
 498	list_splice(&ordered_objects, objects);
 499
 500	/* Attempt to pin all of the buffers into the GTT.
 
 501	 * This is done in 3 phases:
 502	 *
 503	 * 1a. Unbind all objects that do not match the GTT constraints for
 504	 *     the execbuffer (fenceable, mappable, alignment etc).
 505	 * 1b. Increment pin count for already bound objects.
 506	 * 2.  Bind new objects.
 507	 * 3.  Decrement pin count.
 508	 *
 509	 * This avoid unnecessary unbinding of later objects in order to makr
 510	 * room for the earlier objects *unless* we need to defragment.
 511	 */
 512	retry = 0;
 
 
 513	do {
 514		ret = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 515
 516		/* Unbind any ill-fitting objects or pin. */
 517		list_for_each_entry(obj, objects, exec_list) {
 518			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 519			bool need_fence, need_mappable;
 520			if (!obj->gtt_space)
 521				continue;
 522
 523			need_fence =
 524				has_fenced_gpu_access &&
 525				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 526				obj->tiling_mode != I915_TILING_NONE;
 527			need_mappable =
 528				entry->relocation_count ? true : need_fence;
 529
 530			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
 531			    (need_mappable && !obj->map_and_fenceable))
 532				ret = i915_gem_object_unbind(obj);
 533			else
 534				ret = i915_gem_object_pin(obj,
 535							  entry->alignment,
 536							  need_mappable);
 537			if (ret)
 538				goto err;
 539
 540			entry++;
 
 
 
 
 
 
 
 
 
 
 
 
 541		}
 
 
 542
 543		/* Bind fresh objects */
 544		list_for_each_entry(obj, objects, exec_list) {
 545			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 546			bool need_fence;
 547
 548			need_fence =
 549				has_fenced_gpu_access &&
 550				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 551				obj->tiling_mode != I915_TILING_NONE;
 552
 553			if (!obj->gtt_space) {
 554				bool need_mappable =
 555					entry->relocation_count ? true : need_fence;
 556
 557				ret = i915_gem_object_pin(obj,
 558							  entry->alignment,
 559							  need_mappable);
 560				if (ret)
 561					break;
 562			}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 563
 564			if (has_fenced_gpu_access) {
 565				if (need_fence) {
 566					ret = i915_gem_object_get_fence(obj, ring);
 567					if (ret)
 568						break;
 569				} else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 570					   obj->tiling_mode == I915_TILING_NONE) {
 571					/* XXX pipelined! */
 572					ret = i915_gem_object_put_fence(obj);
 573					if (ret)
 574						break;
 575				}
 576				obj->pending_fenced_gpu_access = need_fence;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 577			}
 578
 579			entry->offset = obj->gtt_offset;
 
 580		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 581
 582		/* Decrement pin count for bound objects */
 583		list_for_each_entry(obj, objects, exec_list) {
 584			if (obj->gtt_space)
 585				i915_gem_object_unpin(obj);
 
 
 586		}
 587
 588		if (ret != -ENOSPC || retry > 1)
 589			return ret;
 590
 591		/* First attempt, just clear anything that is purgeable.
 592		 * Second attempt, clear the entire GTT.
 
 
 
 
 593		 */
 594		ret = i915_gem_evict_everything(ring->dev, retry == 0);
 595		if (ret)
 596			return ret;
 
 
 597
 598		retry++;
 599	} while (1);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 600
 601err:
 602	obj = list_entry(obj->exec_list.prev,
 603			 struct drm_i915_gem_object,
 604			 exec_list);
 605	while (objects != &obj->exec_list) {
 606		if (obj->gtt_space)
 607			i915_gem_object_unpin(obj);
 608
 609		obj = list_entry(obj->exec_list.prev,
 610				 struct drm_i915_gem_object,
 611				 exec_list);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 612	}
 613
 614	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 615}
 616
 617static int
 618i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 619				  struct drm_file *file,
 620				  struct intel_ring_buffer *ring,
 621				  struct list_head *objects,
 622				  struct eb_objects *eb,
 623				  struct drm_i915_gem_exec_object2 *exec,
 624				  int count)
 625{
 626	struct drm_i915_gem_relocation_entry *reloc;
 627	struct drm_i915_gem_object *obj;
 628	int *reloc_offset;
 629	int i, total, ret;
 
 630
 631	/* We may process another execbuffer during the unlock... */
 632	while (!list_empty(objects)) {
 633		obj = list_first_entry(objects,
 634				       struct drm_i915_gem_object,
 635				       exec_list);
 636		list_del_init(&obj->exec_list);
 637		drm_gem_object_unreference(&obj->base);
 638	}
 
 
 
 
 
 639
 640	mutex_unlock(&dev->struct_mutex);
 
 
 
 
 641
 642	total = 0;
 643	for (i = 0; i < count; i++)
 644		total += exec[i].relocation_count;
 645
 646	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
 647	reloc = drm_malloc_ab(total, sizeof(*reloc));
 648	if (reloc == NULL || reloc_offset == NULL) {
 649		drm_free_large(reloc);
 650		drm_free_large(reloc_offset);
 651		mutex_lock(&dev->struct_mutex);
 652		return -ENOMEM;
 
 
 
 
 
 
 653	}
 
 
 
 
 
 
 
 
 654
 655	total = 0;
 656	for (i = 0; i < count; i++) {
 657		struct drm_i915_gem_relocation_entry __user *user_relocs;
 
 
 
 
 
 
 
 
 
 
 
 658
 659		user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
 
 660
 661		if (copy_from_user(reloc+total, user_relocs,
 662				   exec[i].relocation_count * sizeof(*reloc))) {
 663			ret = -EFAULT;
 664			mutex_lock(&dev->struct_mutex);
 665			goto err;
 666		}
 667
 668		reloc_offset[i] = total;
 669		total += exec[i].relocation_count;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 670	}
 671
 672	ret = i915_mutex_lock_interruptible(dev);
 673	if (ret) {
 674		mutex_lock(&dev->struct_mutex);
 675		goto err;
 
 
 
 
 676	}
 
 
 
 
 
 
 
 
 
 
 677
 678	/* reacquire the objects */
 679	eb_reset(eb);
 680	for (i = 0; i < count; i++) {
 681		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
 682							exec[i].handle));
 683		if (&obj->base == NULL) {
 684			DRM_ERROR("Invalid object handle %d at index %d\n",
 685				   exec[i].handle, i);
 686			ret = -ENOENT;
 687			goto err;
 688		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 689
 690		list_add_tail(&obj->exec_list, objects);
 691		obj->exec_handle = exec[i].handle;
 692		obj->exec_entry = &exec[i];
 693		eb_add_object(eb, obj);
 
 
 
 694	}
 695
 696	ret = i915_gem_execbuffer_reserve(ring, file, objects);
 697	if (ret)
 
 698		goto err;
 699
 700	list_for_each_entry(obj, objects, exec_list) {
 701		int offset = obj->exec_entry - exec;
 702		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
 703							       reloc + reloc_offset[offset]);
 704		if (ret)
 705			goto err;
 
 
 
 
 
 
 
 
 706	}
 707
 708	/* Leave the user relocations as are, this is the painfully slow path,
 
 709	 * and we want to avoid the complication of dropping the lock whilst
 710	 * having buffers reserved in the aperture and so causing spurious
 711	 * ENOSPC for random operations.
 712	 */
 713
 714err:
 715	drm_free_large(reloc);
 716	drm_free_large(reloc_offset);
 717	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 718}
 719
 720static int
 721i915_gem_execbuffer_flush(struct drm_device *dev,
 722			  uint32_t invalidate_domains,
 723			  uint32_t flush_domains,
 724			  uint32_t flush_rings)
 725{
 726	drm_i915_private_t *dev_priv = dev->dev_private;
 727	int i, ret;
 728
 729	if (flush_domains & I915_GEM_DOMAIN_CPU)
 730		intel_gtt_chipset_flush();
 731
 732	if (flush_domains & I915_GEM_DOMAIN_GTT)
 733		wmb();
 
 734
 735	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
 736		for (i = 0; i < I915_NUM_RINGS; i++)
 737			if (flush_rings & (1 << i)) {
 738				ret = i915_gem_flush_ring(&dev_priv->ring[i],
 739							  invalidate_domains,
 740							  flush_domains);
 741				if (ret)
 742					return ret;
 743			}
 744	}
 745
 746	return 0;
 
 
 
 747}
 748
 749static int
 750i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
 751			       struct intel_ring_buffer *to)
 752{
 753	struct intel_ring_buffer *from = obj->ring;
 754	u32 seqno;
 755	int ret, idx;
 756
 757	if (from == NULL || to == from)
 758		return 0;
 
 
 
 
 
 
 
 
 
 
 759
 760	/* XXX gpu semaphores are implicated in various hard hangs on SNB */
 761	if (INTEL_INFO(obj->base.dev)->gen < 6 || !i915_semaphores)
 762		return i915_gem_object_wait_rendering(obj);
 
 
 763
 764	idx = intel_ring_sync_index(from, to);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 765
 766	seqno = obj->last_rendering_seqno;
 767	if (seqno <= from->sync_seqno[idx])
 768		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 769
 770	if (seqno == from->outstanding_lazy_request) {
 771		struct drm_i915_gem_request *request;
 772
 773		request = kzalloc(sizeof(*request), GFP_KERNEL);
 774		if (request == NULL)
 775			return -ENOMEM;
 
 
 776
 777		ret = i915_add_request(from, NULL, request);
 778		if (ret) {
 779			kfree(request);
 780			return ret;
 781		}
 782
 783		seqno = request->seqno;
 
 
 
 
 
 
 
 784	}
 
 
 
 
 785
 786	from->sync_seqno[idx] = seqno;
 787	return intel_ring_sync(to, from, seqno - 1);
 788}
 789
 790static int
 791i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
 792{
 793	u32 plane, flip_mask;
 794	int ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 795
 796	/* Check for any pending flips. As we only maintain a flip queue depth
 797	 * of 1, we can simply insert a WAIT for the next display flip prior
 798	 * to executing the batch and avoid stalling the CPU.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 799	 */
 
 
 
 
 
 800
 801	for (plane = 0; flips >> plane; plane++) {
 802		if (((flips >> plane) & 1) == 0)
 803			continue;
 804
 805		if (plane)
 806			flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
 807		else
 808			flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
 
 
 
 
 
 
 
 
 
 
 
 809
 810		ret = intel_ring_begin(ring, 2);
 811		if (ret)
 812			return ret;
 813
 814		intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
 815		intel_ring_emit(ring, MI_NOOP);
 816		intel_ring_advance(ring);
 817	}
 818
 
 
 
 
 
 
 
 
 
 
 
 
 819	return 0;
 820}
 821
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 822
 823static int
 824i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
 825				struct list_head *objects)
 826{
 827	struct drm_i915_gem_object *obj;
 828	struct change_domains cd;
 829	int ret;
 830
 831	memset(&cd, 0, sizeof(cd));
 832	list_for_each_entry(obj, objects, exec_list)
 833		i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
 834
 835	if (cd.invalidate_domains | cd.flush_domains) {
 836		ret = i915_gem_execbuffer_flush(ring->dev,
 837						cd.invalidate_domains,
 838						cd.flush_domains,
 839						cd.flush_rings);
 840		if (ret)
 841			return ret;
 842	}
 843
 844	if (cd.flips) {
 845		ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
 846		if (ret)
 847			return ret;
 848	}
 849
 850	list_for_each_entry(obj, objects, exec_list) {
 851		ret = i915_gem_execbuffer_sync_rings(obj, ring);
 852		if (ret)
 853			return ret;
 854	}
 855
 
 
 
 
 
 
 
 
 856	return 0;
 857}
 858
 859static bool
 860i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 
 
 
 
 
 861{
 862	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
 
 
 
 
 
 
 
 863}
 864
 865static int
 866validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
 867		   int count)
 868{
 869	int i;
 870
 871	for (i = 0; i < count; i++) {
 872		char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
 873		int length; /* limited by fault_in_pages_readable() */
 
 
 
 
 874
 875		/* First check for malicious input causing overflow */
 876		if (exec[i].relocation_count >
 877		    INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
 878			return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 879
 880		length = exec[i].relocation_count *
 881			sizeof(struct drm_i915_gem_relocation_entry);
 882		if (!access_ok(VERIFY_READ, ptr, length))
 883			return -EFAULT;
 884
 885		/* we may also need to update the presumed offsets */
 886		if (!access_ok(VERIFY_WRITE, ptr, length))
 887			return -EFAULT;
 888
 889		if (fault_in_pages_readable(ptr, length))
 890			return -EFAULT;
 
 891	}
 892
 893	return 0;
 894}
 895
 896static void
 897i915_gem_execbuffer_move_to_active(struct list_head *objects,
 898				   struct intel_ring_buffer *ring,
 899				   u32 seqno)
 900{
 901	struct drm_i915_gem_object *obj;
 902
 903	list_for_each_entry(obj, objects, exec_list) {
 904		  u32 old_read = obj->base.read_domains;
 905		  u32 old_write = obj->base.write_domain;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 906
 
 
 
 
 907
 908		obj->base.read_domains = obj->base.pending_read_domains;
 909		obj->base.write_domain = obj->base.pending_write_domain;
 910		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
 911
 912		i915_gem_object_move_to_active(obj, ring, seqno);
 913		if (obj->base.write_domain) {
 914			obj->dirty = 1;
 915			obj->pending_gpu_write = true;
 916			list_move_tail(&obj->gpu_write_list,
 917				       &ring->gpu_write_list);
 918			intel_mark_busy(ring->dev, obj);
 919		}
 920
 921		trace_i915_gem_object_change_domain(obj, old_read, old_write);
 
 
 
 922	}
 
 
 
 
 
 
 923}
 924
 925static void
 926i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 927				    struct drm_file *file,
 928				    struct intel_ring_buffer *ring)
 929{
 930	struct drm_i915_gem_request *request;
 931	u32 invalidate;
 
 932
 933	/*
 934	 * Ensure that the commands in the batch buffer are
 935	 * finished before the interrupt fires.
 936	 *
 937	 * The sampler always gets flushed on i965 (sigh).
 938	 */
 939	invalidate = I915_GEM_DOMAIN_COMMAND;
 940	if (INTEL_INFO(dev)->gen >= 4)
 941		invalidate |= I915_GEM_DOMAIN_SAMPLER;
 942	if (ring->flush(ring, invalidate, 0)) {
 943		i915_gem_next_request_seqno(ring);
 944		return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 945	}
 946
 947	/* Add a breadcrumb for the completion of the batch buffer */
 948	request = kzalloc(sizeof(*request), GFP_KERNEL);
 949	if (request == NULL || i915_add_request(ring, file, request)) {
 950		i915_gem_next_request_seqno(ring);
 951		kfree(request);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 952	}
 953}
 954
 955static int
 956i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 957		       struct drm_file *file,
 958		       struct drm_i915_gem_execbuffer2 *args,
 959		       struct drm_i915_gem_exec_object2 *exec)
 
 960{
 961	drm_i915_private_t *dev_priv = dev->dev_private;
 962	struct list_head objects;
 963	struct eb_objects *eb;
 964	struct drm_i915_gem_object *batch_obj;
 965	struct drm_clip_rect *cliprects = NULL;
 966	struct intel_ring_buffer *ring;
 967	u32 exec_start, exec_len;
 968	u32 seqno;
 969	int ret, mode, i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 970
 971	if (!i915_gem_check_execbuffer(args)) {
 972		DRM_ERROR("execbuf with invalid offset/length\n");
 973		return -EINVAL;
 974	}
 
 
 
 
 
 
 975
 976	ret = validate_exec_list(exec, args->buffer_count);
 977	if (ret)
 978		return ret;
 979
 980	switch (args->flags & I915_EXEC_RING_MASK) {
 981	case I915_EXEC_DEFAULT:
 982	case I915_EXEC_RENDER:
 983		ring = &dev_priv->ring[RCS];
 984		break;
 985	case I915_EXEC_BSD:
 986		if (!HAS_BSD(dev)) {
 987			DRM_ERROR("execbuf with invalid ring (BSD)\n");
 988			return -EINVAL;
 989		}
 990		ring = &dev_priv->ring[VCS];
 991		break;
 992	case I915_EXEC_BLT:
 993		if (!HAS_BLT(dev)) {
 994			DRM_ERROR("execbuf with invalid ring (BLT)\n");
 995			return -EINVAL;
 996		}
 997		ring = &dev_priv->ring[BCS];
 998		break;
 999	default:
1000		DRM_ERROR("execbuf with unknown ring: %d\n",
1001			  (int)(args->flags & I915_EXEC_RING_MASK));
1002		return -EINVAL;
1003	}
1004
1005	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1006	switch (mode) {
1007	case I915_EXEC_CONSTANTS_REL_GENERAL:
1008	case I915_EXEC_CONSTANTS_ABSOLUTE:
1009	case I915_EXEC_CONSTANTS_REL_SURFACE:
1010		if (ring == &dev_priv->ring[RCS] &&
1011		    mode != dev_priv->relative_constants_mode) {
1012			if (INTEL_INFO(dev)->gen < 4)
1013				return -EINVAL;
1014
1015			if (INTEL_INFO(dev)->gen > 5 &&
1016			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
1017				return -EINVAL;
1018
1019			ret = intel_ring_begin(ring, 4);
1020			if (ret)
1021				return ret;
1022
1023			intel_ring_emit(ring, MI_NOOP);
1024			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1025			intel_ring_emit(ring, INSTPM);
1026			intel_ring_emit(ring,
1027					I915_EXEC_CONSTANTS_MASK << 16 | mode);
1028			intel_ring_advance(ring);
1029
1030			dev_priv->relative_constants_mode = mode;
1031		}
1032		break;
1033	default:
1034		DRM_ERROR("execbuf with unknown constants: %d\n", mode);
1035		return -EINVAL;
1036	}
1037
1038	if (args->buffer_count < 1) {
1039		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1040		return -EINVAL;
1041	}
1042
1043	if (args->num_cliprects != 0) {
1044		if (ring != &dev_priv->ring[RCS]) {
1045			DRM_ERROR("clip rectangles are only valid with the render ring\n");
1046			return -EINVAL;
1047		}
1048
1049		cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
1050				    GFP_KERNEL);
1051		if (cliprects == NULL) {
1052			ret = -ENOMEM;
1053			goto pre_mutex_err;
1054		}
1055
1056		if (copy_from_user(cliprects,
1057				     (struct drm_clip_rect __user *)(uintptr_t)
1058				     args->cliprects_ptr,
1059				     sizeof(*cliprects)*args->num_cliprects)) {
1060			ret = -EFAULT;
1061			goto pre_mutex_err;
1062		}
1063	}
1064
1065	ret = i915_mutex_lock_interruptible(dev);
1066	if (ret)
1067		goto pre_mutex_err;
 
 
 
 
 
 
1068
1069	if (dev_priv->mm.suspended) {
1070		mutex_unlock(&dev->struct_mutex);
1071		ret = -EBUSY;
1072		goto pre_mutex_err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1073	}
1074
1075	eb = eb_create(args->buffer_count);
1076	if (eb == NULL) {
1077		mutex_unlock(&dev->struct_mutex);
1078		ret = -ENOMEM;
1079		goto pre_mutex_err;
 
 
 
 
 
1080	}
1081
1082	/* Look up object handles */
1083	INIT_LIST_HEAD(&objects);
1084	for (i = 0; i < args->buffer_count; i++) {
1085		struct drm_i915_gem_object *obj;
1086
1087		obj = to_intel_bo(drm_gem_object_lookup(dev, file,
1088							exec[i].handle));
1089		if (&obj->base == NULL) {
1090			DRM_ERROR("Invalid object handle %d at index %d\n",
1091				   exec[i].handle, i);
1092			/* prevent error path from reading uninitialized data */
1093			ret = -ENOENT;
1094			goto err;
1095		}
1096
1097		if (!list_empty(&obj->exec_list)) {
1098			DRM_ERROR("Object %p [handle %d, index %d] appears more than once in object list\n",
1099				   obj, exec[i].handle, i);
1100			ret = -EINVAL;
1101			goto err;
 
 
 
 
 
 
 
 
1102		}
1103
1104		list_add_tail(&obj->exec_list, &objects);
1105		obj->exec_handle = exec[i].handle;
1106		obj->exec_entry = &exec[i];
1107		eb_add_object(eb, obj);
1108	}
1109
1110	/* take note of the batch buffer before we might reorder the lists */
1111	batch_obj = list_entry(objects.prev,
1112			       struct drm_i915_gem_object,
1113			       exec_list);
1114
1115	/* Move the objects en-masse into the GTT, evicting if necessary. */
1116	ret = i915_gem_execbuffer_reserve(ring, file, &objects);
1117	if (ret)
1118		goto err;
1119
1120	/* The objects are in their final locations, apply the relocations. */
1121	ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
1122	if (ret) {
1123		if (ret == -EFAULT) {
1124			ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
1125								&objects, eb,
1126								exec,
1127								args->buffer_count);
1128			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 
 
 
 
 
 
 
 
 
 
 
 
1129		}
1130		if (ret)
1131			goto err;
1132	}
1133
1134	/* Set the pending read domains for the batch buffer to COMMAND */
1135	if (batch_obj->base.pending_write_domain) {
1136		DRM_ERROR("Attempting to use self-modifying batch buffer\n");
1137		ret = -EINVAL;
1138		goto err;
1139	}
1140	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1141
1142	ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
1143	if (ret)
1144		goto err;
1145
1146	seqno = i915_gem_next_request_seqno(ring);
1147	for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
1148		if (seqno < ring->sync_seqno[i]) {
1149			/* The GPU can not handle its semaphore value wrapping,
1150			 * so every billion or so execbuffers, we need to stall
1151			 * the GPU in order to reset the counters.
1152			 */
1153			ret = i915_gpu_idle(dev);
1154			if (ret)
1155				goto err;
1156
1157			BUG_ON(ring->sync_seqno[i]);
1158		}
 
 
 
1159	}
1160
1161	trace_i915_gem_ring_dispatch(ring, seqno);
 
 
 
 
1162
1163	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1164	exec_len = args->batch_len;
1165	if (cliprects) {
1166		for (i = 0; i < args->num_cliprects; i++) {
1167			ret = i915_emit_box(dev, &cliprects[i],
1168					    args->DR1, args->DR4);
1169			if (ret)
1170				goto err;
1171
1172			ret = ring->dispatch_execbuffer(ring,
1173							exec_start, exec_len);
1174			if (ret)
1175				goto err;
 
1176		}
1177	} else {
1178		ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1179		if (ret)
1180			goto err;
1181	}
1182
1183	i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1184	i915_gem_execbuffer_retire_commands(dev, file, ring);
 
 
 
 
 
 
1185
1186err:
1187	eb_destroy(eb);
1188	while (!list_empty(&objects)) {
1189		struct drm_i915_gem_object *obj;
1190
1191		obj = list_first_entry(&objects,
1192				       struct drm_i915_gem_object,
1193				       exec_list);
1194		list_del_init(&obj->exec_list);
1195		drm_gem_object_unreference(&obj->base);
 
 
 
 
 
 
 
 
1196	}
1197
 
 
 
 
 
 
1198	mutex_unlock(&dev->struct_mutex);
 
 
 
 
 
 
 
 
 
 
 
 
1199
1200pre_mutex_err:
1201	kfree(cliprects);
1202	return ret;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1203}
1204
1205/*
1206 * Legacy execbuffer just creates an exec2 list from the original exec object
1207 * list array and passes it to the real function.
1208 */
1209int
1210i915_gem_execbuffer(struct drm_device *dev, void *data,
1211		    struct drm_file *file)
1212{
1213	struct drm_i915_gem_execbuffer *args = data;
1214	struct drm_i915_gem_execbuffer2 exec2;
1215	struct drm_i915_gem_exec_object *exec_list = NULL;
1216	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1217	int ret, i;
 
 
1218
1219	if (args->buffer_count < 1) {
1220		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
1221		return -EINVAL;
1222	}
1223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1224	/* Copy in the exec list from userland */
1225	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1226	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
 
 
1227	if (exec_list == NULL || exec2_list == NULL) {
1228		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1229			  args->buffer_count);
1230		drm_free_large(exec_list);
1231		drm_free_large(exec2_list);
1232		return -ENOMEM;
1233	}
1234	ret = copy_from_user(exec_list,
1235			     (struct drm_i915_relocation_entry __user *)
1236			     (uintptr_t) args->buffers_ptr,
1237			     sizeof(*exec_list) * args->buffer_count);
1238	if (ret != 0) {
1239		DRM_ERROR("copy %d exec entries failed %d\n",
1240			  args->buffer_count, ret);
1241		drm_free_large(exec_list);
1242		drm_free_large(exec2_list);
1243		return -EFAULT;
1244	}
1245
1246	for (i = 0; i < args->buffer_count; i++) {
1247		exec2_list[i].handle = exec_list[i].handle;
1248		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1249		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1250		exec2_list[i].alignment = exec_list[i].alignment;
1251		exec2_list[i].offset = exec_list[i].offset;
1252		if (INTEL_INFO(dev)->gen < 4)
1253			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1254		else
1255			exec2_list[i].flags = 0;
1256	}
1257
1258	exec2.buffers_ptr = args->buffers_ptr;
1259	exec2.buffer_count = args->buffer_count;
1260	exec2.batch_start_offset = args->batch_start_offset;
1261	exec2.batch_len = args->batch_len;
1262	exec2.DR1 = args->DR1;
1263	exec2.DR4 = args->DR4;
1264	exec2.num_cliprects = args->num_cliprects;
1265	exec2.cliprects_ptr = args->cliprects_ptr;
1266	exec2.flags = I915_EXEC_RENDER;
1267
1268	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1269	if (!ret) {
1270		/* Copy the new buffer offsets back to the user's exec list. */
1271		for (i = 0; i < args->buffer_count; i++)
1272			exec_list[i].offset = exec2_list[i].offset;
1273		/* ... and back out to userspace */
1274		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1275				   (uintptr_t) args->buffers_ptr,
1276				   exec_list,
1277				   sizeof(*exec_list) * args->buffer_count);
1278		if (ret) {
1279			ret = -EFAULT;
1280			DRM_ERROR("failed to copy %d exec entries "
1281				  "back to user (%d)\n",
1282				  args->buffer_count, ret);
1283		}
1284	}
1285
1286	drm_free_large(exec_list);
1287	drm_free_large(exec2_list);
1288	return ret;
1289}
1290
1291int
1292i915_gem_execbuffer2(struct drm_device *dev, void *data,
1293		     struct drm_file *file)
1294{
1295	struct drm_i915_gem_execbuffer2 *args = data;
1296	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1297	int ret;
 
 
1298
1299	if (args->buffer_count < 1) {
1300		DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
1301		return -EINVAL;
1302	}
1303
1304	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1305			     GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
1306	if (exec2_list == NULL)
1307		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1308					   args->buffer_count);
 
1309	if (exec2_list == NULL) {
1310		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
1311			  args->buffer_count);
1312		return -ENOMEM;
1313	}
1314	ret = copy_from_user(exec2_list,
1315			     (struct drm_i915_relocation_entry __user *)
1316			     (uintptr_t) args->buffers_ptr,
1317			     sizeof(*exec2_list) * args->buffer_count);
1318	if (ret != 0) {
1319		DRM_ERROR("copy %d exec entries failed %d\n",
1320			  args->buffer_count, ret);
1321		drm_free_large(exec2_list);
1322		return -EFAULT;
1323	}
1324
1325	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1326	if (!ret) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1327		/* Copy the new buffer offsets back to the user's exec list. */
1328		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
1329				   (uintptr_t) args->buffers_ptr,
1330				   exec2_list,
1331				   sizeof(*exec2_list) * args->buffer_count);
1332		if (ret) {
1333			ret = -EFAULT;
1334			DRM_ERROR("failed to copy %d exec entries "
1335				  "back to user (%d)\n",
1336				  args->buffer_count, ret);
 
1337		}
 
 
1338	}
1339
1340	drm_free_large(exec2_list);
1341	return ret;
 
 
1342}

   1/*
   2 * Copyright © 2008,2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Chris Wilson <chris@chris-wilson.co.uk>
  26 *
  27 */
  28
  29#include <linux/dma_remapping.h>
  30#include <linux/reservation.h>
  31#include <linux/sync_file.h>
  32#include <linux/uaccess.h>
  33
  34#include <drm/drmP.h>
  35#include <drm/drm_syncobj.h>
  36#include <drm/i915_drm.h>
  37
  38#include "i915_drv.h"
  39#include "i915_gem_clflush.h"
  40#include "i915_trace.h"
  41#include "intel_drv.h"
  42#include "intel_frontbuffer.h"
  43
  44enum {
  45	FORCE_CPU_RELOC = 1,
  46	FORCE_GTT_RELOC,
  47	FORCE_GPU_RELOC,
  48#define DBG_FORCE_RELOC 0 /* choose one of the above! */
  49};
  50
  51#define __EXEC_OBJECT_HAS_REF		BIT(31)
  52#define __EXEC_OBJECT_HAS_PIN		BIT(30)
  53#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
  54#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
  55#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
  56#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
  57#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
  58
  59#define __EXEC_HAS_RELOC	BIT(31)
  60#define __EXEC_VALIDATED	BIT(30)
  61#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
  62#define UPDATE			PIN_OFFSET_FIXED
  63
  64#define BATCH_OFFSET_BIAS (256*1024)
  65
  66#define __I915_EXEC_ILLEGAL_FLAGS \
  67	(__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK)
  68
  69/**
  70 * DOC: User command execution
  71 *
  72 * Userspace submits commands to be executed on the GPU as an instruction
  73 * stream within a GEM object we call a batchbuffer. This instructions may
  74 * refer to other GEM objects containing auxiliary state such as kernels,
  75 * samplers, render targets and even secondary batchbuffers. Userspace does
  76 * not know where in the GPU memory these objects reside and so before the
  77 * batchbuffer is passed to the GPU for execution, those addresses in the
  78 * batchbuffer and auxiliary objects are updated. This is known as relocation,
  79 * or patching. To try and avoid having to relocate each object on the next
  80 * execution, userspace is told the location of those objects in this pass,
  81 * but this remains just a hint as the kernel may choose a new location for
  82 * any object in the future.
  83 *
  84 * Processing an execbuf ioctl is conceptually split up into a few phases.
  85 *
  86 * 1. Validation - Ensure all the pointers, handles and flags are valid.
  87 * 2. Reservation - Assign GPU address space for every object
  88 * 3. Relocation - Update any addresses to point to the final locations
  89 * 4. Serialisation - Order the request with respect to its dependencies
  90 * 5. Construction - Construct a request to execute the batchbuffer
  91 * 6. Submission (at some point in the future execution)
  92 *
  93 * Reserving resources for the execbuf is the most complicated phase. We
  94 * neither want to have to migrate the object in the address space, nor do
  95 * we want to have to update any relocations pointing to this object. Ideally,
  96 * we want to leave the object where it is and for all the existing relocations
  97 * to match. If the object is given a new address, or if userspace thinks the
  98 * object is elsewhere, we have to parse all the relocation entries and update
  99 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
 100 * all the target addresses in all of its objects match the value in the
 101 * relocation entries and that they all match the presumed offsets given by the
 102 * list of execbuffer objects. Using this knowledge, we know that if we haven't
 103 * moved any buffers, all the relocation entries are valid and we can skip
 104 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
 105 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
 106 *
 107 *      The addresses written in the objects must match the corresponding
 108 *      reloc.presumed_offset which in turn must match the corresponding
 109 *      execobject.offset.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 110 *
 111 *      Any render targets written to in the batch must be flagged with
 112 *      EXEC_OBJECT_WRITE.
 113 *
 114 *      To avoid stalling, execobject.offset should match the current
 115 *      address of that object within the active context.
 
 
 
 
 116 *
 117 * The reservation is done is multiple phases. First we try and keep any
 118 * object already bound in its current location - so as long as meets the
 119 * constraints imposed by the new execbuffer. Any object left unbound after the
 120 * first pass is then fitted into any available idle space. If an object does
 121 * not fit, all objects are removed from the reservation and the process rerun
 122 * after sorting the objects into a priority order (more difficult to fit
 123 * objects are tried first). Failing that, the entire VM is cleared and we try
 124 * to fit the execbuf once last time before concluding that it simply will not
 125 * fit.
 
 
 
 
 
 
 
 
 
 
 
 
 
 126 *
 127 * A small complication to all of this is that we allow userspace not only to
 128 * specify an alignment and a size for the object in the address space, but
 129 * we also allow userspace to specify the exact offset. This objects are
 130 * simpler to place (the location is known a priori) all we have to do is make
 131 * sure the space is available.
 132 *
 133 * Once all the objects are in place, patching up the buried pointers to point
 134 * to the final locations is a fairly simple job of walking over the relocation
 135 * entry arrays, looking up the right address and rewriting the value into
 136 * the object. Simple! ... The relocation entries are stored in user memory
 137 * and so to access them we have to copy them into a local buffer. That copy
 138 * has to avoid taking any pagefaults as they may lead back to a GEM object
 139 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
 140 * the relocation into multiple passes. First we try to do everything within an
 141 * atomic context (avoid the pagefaults) which requires that we never wait. If
 142 * we detect that we may wait, or if we need to fault, then we have to fallback
 143 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
 144 * bells yet?) Dropping the mutex means that we lose all the state we have
 145 * built up so far for the execbuf and we must reset any global data. However,
 146 * we do leave the objects pinned in their final locations - which is a
 147 * potential issue for concurrent execbufs. Once we have left the mutex, we can
 148 * allocate and copy all the relocation entries into a large array at our
 149 * leisure, reacquire the mutex, reclaim all the objects and other state and
 150 * then proceed to update any incorrect addresses with the objects.
 151 *
 152 * As we process the relocation entries, we maintain a record of whether the
 153 * object is being written to. Using NORELOC, we expect userspace to provide
 154 * this information instead. We also check whether we can skip the relocation
 155 * by comparing the expected value inside the relocation entry with the target's
 156 * final address. If they differ, we have to map the current object and rewrite
 157 * the 4 or 8 byte pointer within.
 158 *
 159 * Serialising an execbuf is quite simple according to the rules of the GEM
 160 * ABI. Execution within each context is ordered by the order of submission.
 161 * Writes to any GEM object are in order of submission and are exclusive. Reads
 162 * from a GEM object are unordered with respect to other reads, but ordered by
 163 * writes. A write submitted after a read cannot occur before the read, and
 164 * similarly any read submitted after a write cannot occur before the write.
 165 * Writes are ordered between engines such that only one write occurs at any
 166 * time (completing any reads beforehand) - using semaphores where available
 167 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
 168 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
 169 * reads before starting, and any read (either using set-domain or pread) must
 170 * flush all GPU writes before starting. (Note we only employ a barrier before,
 171 * we currently rely on userspace not concurrently starting a new execution
 172 * whilst reading or writing to an object. This may be an advantage or not
 173 * depending on how much you trust userspace not to shoot themselves in the
 174 * foot.) Serialisation may just result in the request being inserted into
 175 * a DAG awaiting its turn, but most simple is to wait on the CPU until
 176 * all dependencies are resolved.
 177 *
 178 * After all of that, is just a matter of closing the request and handing it to
 179 * the hardware (well, leaving it in a queue to be executed). However, we also
 180 * offer the ability for batchbuffers to be run with elevated privileges so
 181 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
 182 * Before any batch is given extra privileges we first must check that it
 183 * contains no nefarious instructions, we check that each instruction is from
 184 * our whitelist and all registers are also from an allowed list. We first
 185 * copy the user's batchbuffer to a shadow (so that the user doesn't have
 186 * access to it, either by the CPU or GPU as we scan it) and then parse each
 187 * instruction. If everything is ok, we set a flag telling the hardware to run
 188 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
 189 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 190
 191struct i915_execbuffer {
 192	struct drm_i915_private *i915; /** i915 backpointer */
 193	struct drm_file *file; /** per-file lookup tables and limits */
 194	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
 195	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
 196	struct i915_vma **vma;
 197	unsigned int *flags;
 198
 199	struct intel_engine_cs *engine; /** engine to queue the request to */
 200	struct i915_gem_context *ctx; /** context for building the request */
 201	struct i915_address_space *vm; /** GTT and vma for the request */
 202
 203	struct i915_request *request; /** our request to build */
 204	struct i915_vma *batch; /** identity of the batch obj/vma */
 205
 206	/** actual size of execobj[] as we may extend it for the cmdparser */
 207	unsigned int buffer_count;
 208
 209	/** list of vma not yet bound during reservation phase */
 210	struct list_head unbound;
 211
 212	/** list of vma that have execobj.relocation_count */
 213	struct list_head relocs;
 214
 215	/**
 216	 * Track the most recently used object for relocations, as we
 217	 * frequently have to perform multiple relocations within the same
 218	 * obj/page
 219	 */
 220	struct reloc_cache {
 221		struct drm_mm_node node; /** temporary GTT binding */
 222		unsigned long vaddr; /** Current kmap address */
 223		unsigned long page; /** Currently mapped page index */
 224		unsigned int gen; /** Cached value of INTEL_GEN */
 225		bool use_64bit_reloc : 1;
 226		bool has_llc : 1;
 227		bool has_fence : 1;
 228		bool needs_unfenced : 1;
 229
 230		struct i915_request *rq;
 231		u32 *rq_cmd;
 232		unsigned int rq_size;
 233	} reloc_cache;
 234
 235	u64 invalid_flags; /** Set of execobj.flags that are invalid */
 236	u32 context_flags; /** Set of execobj.flags to insert from the ctx */
 237
 238	u32 batch_start_offset; /** Location within object of batch */
 239	u32 batch_len; /** Length of batch within object */
 240	u32 batch_flags; /** Flags composed for emit_bb_start() */
 241
 242	/**
 243	 * Indicate either the size of the hastable used to resolve
 244	 * relocation handles, or if negative that we are using a direct
 245	 * index into the execobj[].
 246	 */
 247	int lut_size;
 248	struct hlist_head *buckets; /** ht for relocation handles */
 
 
 
 
 
 
 
 
 
 
 
 
 249};
 250
 251#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
 
 
 
 
 
 
 
 
 
 
 
 252
 253/*
 254 * Used to convert any address to canonical form.
 255 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
 256 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
 257 * addresses to be in a canonical form:
 258 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
 259 * canonical form [63:48] == [47]."
 260 */
 261#define GEN8_HIGH_ADDRESS_BIT 47
 262static inline u64 gen8_canonical_addr(u64 address)
 263{
 264	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
 265}
 266
 267static inline u64 gen8_noncanonical_addr(u64 address)
 
 268{
 269	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
 270}
 271
 272static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 
 273{
 274	return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
 
 275}
 276
 277static int eb_create(struct i915_execbuffer *eb)
 
 278{
 279	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
 280		unsigned int size = 1 + ilog2(eb->buffer_count);
 281
 282		/*
 283		 * Without a 1:1 association between relocation handles and
 284		 * the execobject[] index, we instead create a hashtable.
 285		 * We size it dynamically based on available memory, starting
 286		 * first with 1:1 assocative hash and scaling back until
 287		 * the allocation succeeds.
 288		 *
 289		 * Later on we use a positive lut_size to indicate we are
 290		 * using this hashtable, and a negative value to indicate a
 291		 * direct lookup.
 292		 */
 293		do {
 294			gfp_t flags;
 295
 296			/* While we can still reduce the allocation size, don't
 297			 * raise a warning and allow the allocation to fail.
 298			 * On the last pass though, we want to try as hard
 299			 * as possible to perform the allocation and warn
 300			 * if it fails.
 301			 */
 302			flags = GFP_KERNEL;
 303			if (size > 1)
 304				flags |= __GFP_NORETRY | __GFP_NOWARN;
 305
 306			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
 307					      flags);
 308			if (eb->buckets)
 309				break;
 310		} while (--size);
 311
 312		if (unlikely(!size))
 313			return -ENOMEM;
 314
 315		eb->lut_size = size;
 316	} else {
 317		eb->lut_size = -eb->buffer_count;
 318	}
 319
 320	return 0;
 321}
 322
 323static bool
 324eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 325		 const struct i915_vma *vma,
 326		 unsigned int flags)
 327{
 328	if (vma->node.size < entry->pad_to_size)
 329		return true;
 330
 331	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
 332		return true;
 333
 334	if (flags & EXEC_OBJECT_PINNED &&
 335	    vma->node.start != entry->offset)
 336		return true;
 337
 338	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
 339	    vma->node.start < BATCH_OFFSET_BIAS)
 340		return true;
 341
 342	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
 343	    (vma->node.start + vma->node.size - 1) >> 32)
 344		return true;
 345
 346	if (flags & __EXEC_OBJECT_NEEDS_MAP &&
 347	    !i915_vma_is_map_and_fenceable(vma))
 348		return true;
 349
 350	return false;
 351}
 352
 353static inline bool
 354eb_pin_vma(struct i915_execbuffer *eb,
 355	   const struct drm_i915_gem_exec_object2 *entry,
 356	   struct i915_vma *vma)
 357{
 358	unsigned int exec_flags = *vma->exec_flags;
 359	u64 pin_flags;
 
 
 360
 361	if (vma->node.size)
 362		pin_flags = vma->node.start;
 363	else
 364		pin_flags = entry->offset & PIN_OFFSET_MASK;
 365
 366	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
 367	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
 368		pin_flags |= PIN_GLOBAL;
 369
 370	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
 371		return false;
 
 
 
 
 
 
 372
 373	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 374		if (unlikely(i915_vma_pin_fence(vma))) {
 375			i915_vma_unpin(vma);
 376			return false;
 377		}
 378
 379		if (vma->fence)
 380			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 381	}
 382
 383	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
 384	return !eb_vma_misplaced(entry, vma, exec_flags);
 385}
 386
 387static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 388{
 389	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 390
 391	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
 392		__i915_vma_unpin_fence(vma);
 393
 394	__i915_vma_unpin(vma);
 395}
 396
 397static inline void
 398eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
 399{
 400	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
 401		return;
 402
 403	__eb_unreserve_vma(vma, *flags);
 404	*flags &= ~__EXEC_OBJECT_RESERVED;
 405}
 406
 407static int
 408eb_validate_vma(struct i915_execbuffer *eb,
 409		struct drm_i915_gem_exec_object2 *entry,
 410		struct i915_vma *vma)
 411{
 412	if (unlikely(entry->flags & eb->invalid_flags))
 413		return -EINVAL;
 414
 415	if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
 416		return -EINVAL;
 417
 418	/*
 419	 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
 420	 * any non-page-aligned or non-canonical addresses.
 421	 */
 422	if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
 423		     entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK)))
 424		return -EINVAL;
 425
 426	/* pad_to_size was once a reserved field, so sanitize it */
 427	if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
 428		if (unlikely(offset_in_page(entry->pad_to_size)))
 429			return -EINVAL;
 430	} else {
 431		entry->pad_to_size = 0;
 
 
 432	}
 433
 434	if (unlikely(vma->exec_flags)) {
 435		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
 436			  entry->handle, (int)(entry - eb->exec));
 437		return -EINVAL;
 
 438	}
 439
 440	/*
 441	 * From drm_mm perspective address space is continuous,
 442	 * so from this point we're always using non-canonical
 443	 * form internally.
 444	 */
 445	entry->offset = gen8_noncanonical_addr(entry->offset);
 446
 447	if (!eb->reloc_cache.has_fence) {
 448		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 
 449	} else {
 450		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
 451		     eb->reloc_cache.needs_unfenced) &&
 452		    i915_gem_object_is_tiled(vma->obj))
 453			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 454	}
 455
 456	if (!(entry->flags & EXEC_OBJECT_PINNED))
 457		entry->flags |= eb->context_flags;
 458
 459	return 0;
 460}
 461
 462static int
 463eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 
 464{
 465	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 466	int err;
 
 467
 468	GEM_BUG_ON(i915_vma_is_closed(vma));
 
 
 469
 470	if (!(eb->args->flags & __EXEC_VALIDATED)) {
 471		err = eb_validate_vma(eb, entry, vma);
 472		if (unlikely(err))
 473			return err;
 
 
 
 
 
 
 
 
 
 474	}
 475
 476	if (eb->lut_size > 0) {
 477		vma->exec_handle = entry->handle;
 478		hlist_add_head(&vma->exec_node,
 479			       &eb->buckets[hash_32(entry->handle,
 480						    eb->lut_size)]);
 481	}
 482
 483	if (entry->relocation_count)
 484		list_add_tail(&vma->reloc_link, &eb->relocs);
 485
 486	/*
 487	 * Stash a pointer from the vma to execobj, so we can query its flags,
 488	 * size, alignment etc as provided by the user. Also we stash a pointer
 489	 * to the vma inside the execobj so that we can use a direct lookup
 490	 * to find the right target VMA when doing relocations.
 491	 */
 492	eb->vma[i] = vma;
 493	eb->flags[i] = entry->flags;
 494	vma->exec_flags = &eb->flags[i];
 495
 496	err = 0;
 497	if (eb_pin_vma(eb, entry, vma)) {
 498		if (entry->offset != vma->node.start) {
 499			entry->offset = vma->node.start | UPDATE;
 500			eb->args->flags |= __EXEC_HAS_RELOC;
 501		}
 502	} else {
 503		eb_unreserve_vma(vma, vma->exec_flags);
 504
 505		list_add_tail(&vma->exec_link, &eb->unbound);
 506		if (drm_mm_node_allocated(&vma->node))
 507			err = i915_vma_unbind(vma);
 508		if (unlikely(err))
 509			vma->exec_flags = NULL;
 510	}
 511	return err;
 512}
 513
 514static inline int use_cpu_reloc(const struct reloc_cache *cache,
 515				const struct drm_i915_gem_object *obj)
 
 
 516{
 517	if (!i915_gem_object_has_struct_page(obj))
 518		return false;
 519
 520	if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
 521		return true;
 
 
 
 522
 523	if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
 524		return false;
 525
 526	return (cache->has_llc ||
 527		obj->cache_dirty ||
 528		obj->cache_level != I915_CACHE_NONE);
 529}
 530
 531static int eb_reserve_vma(const struct i915_execbuffer *eb,
 532			  struct i915_vma *vma)
 
 
 533{
 534	struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
 535	unsigned int exec_flags = *vma->exec_flags;
 536	u64 pin_flags;
 537	int err;
 538
 539	pin_flags = PIN_USER | PIN_NONBLOCK;
 540	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
 541		pin_flags |= PIN_GLOBAL;
 542
 543	/*
 544	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
 545	 * limit address to the first 4GBs for unflagged objects.
 546	 */
 547	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
 548		pin_flags |= PIN_ZONE_4G;
 549
 550	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
 551		pin_flags |= PIN_MAPPABLE;
 552
 553	if (exec_flags & EXEC_OBJECT_PINNED) {
 554		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
 555		pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
 556	} else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
 557		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 558	}
 559
 560	err = i915_vma_pin(vma,
 561			   entry->pad_to_size, entry->alignment,
 562			   pin_flags);
 563	if (err)
 564		return err;
 565
 566	if (entry->offset != vma->node.start) {
 567		entry->offset = vma->node.start | UPDATE;
 568		eb->args->flags |= __EXEC_HAS_RELOC;
 569	}
 570
 571	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 572		err = i915_vma_pin_fence(vma);
 573		if (unlikely(err)) {
 574			i915_vma_unpin(vma);
 575			return err;
 576		}
 577
 578		if (vma->fence)
 579			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 580	}
 
 581
 582	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
 583	GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
 584
 585	return 0;
 586}
 587
 588static int eb_reserve(struct i915_execbuffer *eb)
 
 
 
 589{
 590	const unsigned int count = eb->buffer_count;
 591	struct list_head last;
 592	struct i915_vma *vma;
 593	unsigned int i, pass;
 594	int err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 595
 596	/*
 597	 * Attempt to pin all of the buffers into the GTT.
 598	 * This is done in 3 phases:
 599	 *
 600	 * 1a. Unbind all objects that do not match the GTT constraints for
 601	 *     the execbuffer (fenceable, mappable, alignment etc).
 602	 * 1b. Increment pin count for already bound objects.
 603	 * 2.  Bind new objects.
 604	 * 3.  Decrement pin count.
 605	 *
 606	 * This avoid unnecessary unbinding of later objects in order to make
 607	 * room for the earlier objects *unless* we need to defragment.
 608	 */
 609
 610	pass = 0;
 611	err = 0;
 612	do {
 613		list_for_each_entry(vma, &eb->unbound, exec_link) {
 614			err = eb_reserve_vma(eb, vma);
 615			if (err)
 616				break;
 617		}
 618		if (err != -ENOSPC)
 619			return err;
 620
 621		/* Resort *all* the objects into priority order */
 622		INIT_LIST_HEAD(&eb->unbound);
 623		INIT_LIST_HEAD(&last);
 624		for (i = 0; i < count; i++) {
 625			unsigned int flags = eb->flags[i];
 626			struct i915_vma *vma = eb->vma[i];
 627
 628			if (flags & EXEC_OBJECT_PINNED &&
 629			    flags & __EXEC_OBJECT_HAS_PIN)
 
 
 
 630				continue;
 631
 632			eb_unreserve_vma(vma, &eb->flags[i]);
 633
 634			if (flags & EXEC_OBJECT_PINNED)
 635				list_add(&vma->exec_link, &eb->unbound);
 636			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
 637				list_add_tail(&vma->exec_link, &eb->unbound);
 
 
 
 
 638			else
 639				list_add_tail(&vma->exec_link, &last);
 640		}
 641		list_splice_tail(&last, &eb->unbound);
 
 
 642
 643		switch (pass++) {
 644		case 0:
 645			break;
 646
 647		case 1:
 648			/* Too fragmented, unbind everything and retry */
 649			err = i915_gem_evict_vm(eb->vm);
 650			if (err)
 651				return err;
 652			break;
 653
 654		default:
 655			return -ENOSPC;
 656		}
 657	} while (1);
 658}
 659
 660static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
 661{
 662	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
 663		return 0;
 664	else
 665		return eb->buffer_count - 1;
 666}
 667
 668static int eb_select_context(struct i915_execbuffer *eb)
 669{
 670	struct i915_gem_context *ctx;
 671
 672	ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
 673	if (unlikely(!ctx))
 674		return -ENOENT;
 675
 676	eb->ctx = ctx;
 677	eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
 678
 679	eb->context_flags = 0;
 680	if (ctx->flags & CONTEXT_NO_ZEROMAP)
 681		eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
 682
 683	return 0;
 684}
 685
 686static int eb_lookup_vmas(struct i915_execbuffer *eb)
 687{
 688	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
 689	struct drm_i915_gem_object *obj;
 690	unsigned int i;
 691	int err;
 692
 693	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
 694		return -ENOENT;
 695
 696	if (unlikely(i915_gem_context_is_banned(eb->ctx)))
 697		return -EIO;
 698
 699	INIT_LIST_HEAD(&eb->relocs);
 700	INIT_LIST_HEAD(&eb->unbound);
 701
 702	for (i = 0; i < eb->buffer_count; i++) {
 703		u32 handle = eb->exec[i].handle;
 704		struct i915_lut_handle *lut;
 705		struct i915_vma *vma;
 706
 707		vma = radix_tree_lookup(handles_vma, handle);
 708		if (likely(vma))
 709			goto add_vma;
 710
 711		obj = i915_gem_object_lookup(eb->file, handle);
 712		if (unlikely(!obj)) {
 713			err = -ENOENT;
 714			goto err_vma;
 715		}
 716
 717		vma = i915_vma_instance(obj, eb->vm, NULL);
 718		if (unlikely(IS_ERR(vma))) {
 719			err = PTR_ERR(vma);
 720			goto err_obj;
 721		}
 722
 723		lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
 724		if (unlikely(!lut)) {
 725			err = -ENOMEM;
 726			goto err_obj;
 727		}
 728
 729		err = radix_tree_insert(handles_vma, handle, vma);
 730		if (unlikely(err)) {
 731			kmem_cache_free(eb->i915->luts, lut);
 732			goto err_obj;
 733		}
 734
 735		/* transfer ref to ctx */
 736		vma->open_count++;
 737		list_add(&lut->obj_link, &obj->lut_list);
 738		list_add(&lut->ctx_link, &eb->ctx->handles_list);
 739		lut->ctx = eb->ctx;
 740		lut->handle = handle;
 741
 742add_vma:
 743		err = eb_add_vma(eb, i, vma);
 744		if (unlikely(err))
 745			goto err_vma;
 746
 747		GEM_BUG_ON(vma != eb->vma[i]);
 748		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 749	}
 750
 751	/* take note of the batch buffer before we might reorder the lists */
 752	i = eb_batch_index(eb);
 753	eb->batch = eb->vma[i];
 754	GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
 755
 756	/*
 757	 * SNA is doing fancy tricks with compressing batch buffers, which leads
 758	 * to negative relocation deltas. Usually that works out ok since the
 759	 * relocate address is still positive, except when the batch is placed
 760	 * very low in the GTT. Ensure this doesn't happen.
 761	 *
 762	 * Note that actual hangs have only been observed on gen7, but for
 763	 * paranoia do it everywhere.
 764	 */
 765	if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
 766		eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
 767	if (eb->reloc_cache.has_fence)
 768		eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
 769
 770	eb->args->flags |= __EXEC_VALIDATED;
 771	return eb_reserve(eb);
 772
 773err_obj:
 774	i915_gem_object_put(obj);
 775err_vma:
 776	eb->vma[i] = NULL;
 777	return err;
 778}
 779
 780static struct i915_vma *
 781eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 782{
 783	if (eb->lut_size < 0) {
 784		if (handle >= -eb->lut_size)
 785			return NULL;
 786		return eb->vma[handle];
 787	} else {
 788		struct hlist_head *head;
 789		struct i915_vma *vma;
 790
 791		head = &eb->buckets[hash_32(handle, eb->lut_size)];
 792		hlist_for_each_entry(vma, head, exec_node) {
 793			if (vma->exec_handle == handle)
 794				return vma;
 795		}
 796		return NULL;
 797	}
 798}
 799
 800static void eb_release_vmas(const struct i915_execbuffer *eb)
 801{
 802	const unsigned int count = eb->buffer_count;
 803	unsigned int i;
 804
 805	for (i = 0; i < count; i++) {
 806		struct i915_vma *vma = eb->vma[i];
 807		unsigned int flags = eb->flags[i];
 808
 809		if (!vma)
 810			break;
 811
 812		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 813		vma->exec_flags = NULL;
 814		eb->vma[i] = NULL;
 815
 816		if (flags & __EXEC_OBJECT_HAS_PIN)
 817			__eb_unreserve_vma(vma, flags);
 818
 819		if (flags & __EXEC_OBJECT_HAS_REF)
 820			i915_vma_put(vma);
 821	}
 822}
 823
 824static void eb_reset_vmas(const struct i915_execbuffer *eb)
 825{
 826	eb_release_vmas(eb);
 827	if (eb->lut_size > 0)
 828		memset(eb->buckets, 0,
 829		       sizeof(struct hlist_head) << eb->lut_size);
 830}
 831
 832static void eb_destroy(const struct i915_execbuffer *eb)
 833{
 834	GEM_BUG_ON(eb->reloc_cache.rq);
 835
 836	if (eb->lut_size > 0)
 837		kfree(eb->buckets);
 838}
 839
 840static inline u64
 841relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
 842		  const struct i915_vma *target)
 843{
 844	return gen8_canonical_addr((int)reloc->delta + target->node.start);
 845}
 846
 847static void reloc_cache_init(struct reloc_cache *cache,
 848			     struct drm_i915_private *i915)
 849{
 850	cache->page = -1;
 851	cache->vaddr = 0;
 852	/* Must be a variable in the struct to allow GCC to unroll. */
 853	cache->gen = INTEL_GEN(i915);
 854	cache->has_llc = HAS_LLC(i915);
 855	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
 856	cache->has_fence = cache->gen < 4;
 857	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 858	cache->node.allocated = false;
 859	cache->rq = NULL;
 860	cache->rq_size = 0;
 861}
 862
 863static inline void *unmask_page(unsigned long p)
 864{
 865	return (void *)(uintptr_t)(p & PAGE_MASK);
 866}
 867
 868static inline unsigned int unmask_flags(unsigned long p)
 869{
 870	return p & ~PAGE_MASK;
 871}
 872
 873#define KMAP 0x4 /* after CLFLUSH_FLAGS */
 874
 875static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 876{
 877	struct drm_i915_private *i915 =
 878		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
 879	return &i915->ggtt;
 880}
 881
 882static void reloc_gpu_flush(struct reloc_cache *cache)
 883{
 884	GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
 885	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
 886	i915_gem_object_unpin_map(cache->rq->batch->obj);
 887	i915_gem_chipset_flush(cache->rq->i915);
 888
 889	__i915_request_add(cache->rq, true);
 890	cache->rq = NULL;
 891}
 892
 893static void reloc_cache_reset(struct reloc_cache *cache)
 894{
 895	void *vaddr;
 896
 897	if (cache->rq)
 898		reloc_gpu_flush(cache);
 899
 900	if (!cache->vaddr)
 901		return;
 902
 903	vaddr = unmask_page(cache->vaddr);
 904	if (cache->vaddr & KMAP) {
 905		if (cache->vaddr & CLFLUSH_AFTER)
 906			mb();
 907
 908		kunmap_atomic(vaddr);
 909		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
 910	} else {
 911		wmb();
 912		io_mapping_unmap_atomic((void __iomem *)vaddr);
 913		if (cache->node.allocated) {
 914			struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 915
 916			ggtt->base.clear_range(&ggtt->base,
 917					       cache->node.start,
 918					       cache->node.size);
 919			drm_mm_remove_node(&cache->node);
 920		} else {
 921			i915_vma_unpin((struct i915_vma *)cache->node.mm);
 922		}
 923	}
 924
 925	cache->vaddr = 0;
 926	cache->page = -1;
 927}
 928
 929static void *reloc_kmap(struct drm_i915_gem_object *obj,
 930			struct reloc_cache *cache,
 931			unsigned long page)
 932{
 933	void *vaddr;
 934
 935	if (cache->vaddr) {
 936		kunmap_atomic(unmask_page(cache->vaddr));
 937	} else {
 938		unsigned int flushes;
 939		int err;
 940
 941		err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
 942		if (err)
 943			return ERR_PTR(err);
 944
 945		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
 946		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
 947
 948		cache->vaddr = flushes | KMAP;
 949		cache->node.mm = (void *)obj;
 950		if (flushes)
 951			mb();
 952	}
 953
 954	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
 955	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
 956	cache->page = page;
 957
 958	return vaddr;
 959}
 960
 961static void *reloc_iomap(struct drm_i915_gem_object *obj,
 962			 struct reloc_cache *cache,
 963			 unsigned long page)
 964{
 965	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 966	unsigned long offset;
 967	void *vaddr;
 968
 969	if (cache->vaddr) {
 970		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
 971	} else {
 972		struct i915_vma *vma;
 973		int err;
 974
 975		if (use_cpu_reloc(cache, obj))
 976			return NULL;
 977
 978		err = i915_gem_object_set_to_gtt_domain(obj, true);
 979		if (err)
 980			return ERR_PTR(err);
 981
 982		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 983					       PIN_MAPPABLE |
 984					       PIN_NONBLOCK |
 985					       PIN_NONFAULT);
 986		if (IS_ERR(vma)) {
 987			memset(&cache->node, 0, sizeof(cache->node));
 988			err = drm_mm_insert_node_in_range
 989				(&ggtt->base.mm, &cache->node,
 990				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
 991				 0, ggtt->mappable_end,
 992				 DRM_MM_INSERT_LOW);
 993			if (err) /* no inactive aperture space, use cpu reloc */
 994				return NULL;
 995		} else {
 996			err = i915_vma_put_fence(vma);
 997			if (err) {
 998				i915_vma_unpin(vma);
 999				return ERR_PTR(err);
1000			}
1001
1002			cache->node.start = vma->node.start;
1003			cache->node.mm = (void *)vma;
1004		}
1005	}
1006
1007	offset = cache->node.start;
1008	if (cache->node.allocated) {
1009		wmb();
1010		ggtt->base.insert_page(&ggtt->base,
1011				       i915_gem_object_get_dma_address(obj, page),
1012				       offset, I915_CACHE_NONE, 0);
1013	} else {
1014		offset += page << PAGE_SHIFT;
1015	}
1016
1017	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1018							 offset);
1019	cache->page = page;
1020	cache->vaddr = (unsigned long)vaddr;
1021
1022	return vaddr;
1023}
1024
1025static void *reloc_vaddr(struct drm_i915_gem_object *obj,
1026			 struct reloc_cache *cache,
1027			 unsigned long page)
1028{
1029	void *vaddr;
1030
1031	if (cache->page == page) {
1032		vaddr = unmask_page(cache->vaddr);
1033	} else {
1034		vaddr = NULL;
1035		if ((cache->vaddr & KMAP) == 0)
1036			vaddr = reloc_iomap(obj, cache, page);
1037		if (!vaddr)
1038			vaddr = reloc_kmap(obj, cache, page);
1039	}
1040
1041	return vaddr;
1042}
1043
1044static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
1045{
1046	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
1047		if (flushes & CLFLUSH_BEFORE) {
1048			clflushopt(addr);
1049			mb();
1050		}
1051
1052		*addr = value;
 
1053
1054		/*
1055		 * Writes to the same cacheline are serialised by the CPU
1056		 * (including clflush). On the write path, we only require
1057		 * that it hits memory in an orderly fashion and place
1058		 * mb barriers at the start and end of the relocation phase
1059		 * to ensure ordering of clflush wrt to the system.
1060		 */
1061		if (flushes & CLFLUSH_AFTER)
1062			clflushopt(addr);
1063	} else
1064		*addr = value;
1065}
1066
1067static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1068			     struct i915_vma *vma,
1069			     unsigned int len)
1070{
1071	struct reloc_cache *cache = &eb->reloc_cache;
1072	struct drm_i915_gem_object *obj;
1073	struct i915_request *rq;
1074	struct i915_vma *batch;
1075	u32 *cmd;
1076	int err;
1077
1078	GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU);
1079
1080	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
1081	if (IS_ERR(obj))
1082		return PTR_ERR(obj);
1083
1084	cmd = i915_gem_object_pin_map(obj,
1085				      cache->has_llc ?
1086				      I915_MAP_FORCE_WB :
1087				      I915_MAP_FORCE_WC);
1088	i915_gem_object_unpin_pages(obj);
1089	if (IS_ERR(cmd))
1090		return PTR_ERR(cmd);
1091
1092	err = i915_gem_object_set_to_wc_domain(obj, false);
1093	if (err)
1094		goto err_unmap;
1095
1096	batch = i915_vma_instance(obj, vma->vm, NULL);
1097	if (IS_ERR(batch)) {
1098		err = PTR_ERR(batch);
1099		goto err_unmap;
1100	}
1101
1102	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
1103	if (err)
1104		goto err_unmap;
1105
1106	rq = i915_request_alloc(eb->engine, eb->ctx);
1107	if (IS_ERR(rq)) {
1108		err = PTR_ERR(rq);
1109		goto err_unpin;
1110	}
1111
1112	err = i915_request_await_object(rq, vma->obj, true);
1113	if (err)
1114		goto err_request;
1115
1116	err = eb->engine->emit_bb_start(rq,
1117					batch->node.start, PAGE_SIZE,
1118					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
1119	if (err)
1120		goto err_request;
1121
1122	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
1123	i915_vma_move_to_active(batch, rq, 0);
1124	reservation_object_lock(batch->resv, NULL);
1125	reservation_object_add_excl_fence(batch->resv, &rq->fence);
1126	reservation_object_unlock(batch->resv);
1127	i915_vma_unpin(batch);
1128
1129	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1130	reservation_object_lock(vma->resv, NULL);
1131	reservation_object_add_excl_fence(vma->resv, &rq->fence);
1132	reservation_object_unlock(vma->resv);
1133
1134	rq->batch = batch;
1135
1136	cache->rq = rq;
1137	cache->rq_cmd = cmd;
1138	cache->rq_size = 0;
1139
1140	/* Return with batch mapping (cmd) still pinned */
1141	return 0;
1142
1143err_request:
1144	i915_request_add(rq);
1145err_unpin:
1146	i915_vma_unpin(batch);
1147err_unmap:
1148	i915_gem_object_unpin_map(obj);
1149	return err;
1150}
1151
1152static u32 *reloc_gpu(struct i915_execbuffer *eb,
1153		      struct i915_vma *vma,
1154		      unsigned int len)
1155{
1156	struct reloc_cache *cache = &eb->reloc_cache;
1157	u32 *cmd;
1158
1159	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
1160		reloc_gpu_flush(cache);
1161
1162	if (unlikely(!cache->rq)) {
1163		int err;
1164
1165		/* If we need to copy for the cmdparser, we will stall anyway */
1166		if (eb_use_cmdparser(eb))
1167			return ERR_PTR(-EWOULDBLOCK);
1168
1169		if (!intel_engine_can_store_dword(eb->engine))
1170			return ERR_PTR(-ENODEV);
1171
1172		err = __reloc_gpu_alloc(eb, vma, len);
1173		if (unlikely(err))
1174			return ERR_PTR(err);
1175	}
1176
1177	cmd = cache->rq_cmd + cache->rq_size;
1178	cache->rq_size += len;
1179
1180	return cmd;
1181}
1182
1183static u64
1184relocate_entry(struct i915_vma *vma,
1185	       const struct drm_i915_gem_relocation_entry *reloc,
1186	       struct i915_execbuffer *eb,
1187	       const struct i915_vma *target)
1188{
1189	u64 offset = reloc->offset;
1190	u64 target_offset = relocation_target(reloc, target);
1191	bool wide = eb->reloc_cache.use_64bit_reloc;
1192	void *vaddr;
1193
1194	if (!eb->reloc_cache.vaddr &&
1195	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1196	     !reservation_object_test_signaled_rcu(vma->resv, true))) {
1197		const unsigned int gen = eb->reloc_cache.gen;
1198		unsigned int len;
1199		u32 *batch;
1200		u64 addr;
1201
1202		if (wide)
1203			len = offset & 7 ? 8 : 5;
1204		else if (gen >= 4)
1205			len = 4;
1206		else
1207			len = 3;
1208
1209		batch = reloc_gpu(eb, vma, len);
1210		if (IS_ERR(batch))
1211			goto repeat;
1212
1213		addr = gen8_canonical_addr(vma->node.start + offset);
1214		if (wide) {
1215			if (offset & 7) {
1216				*batch++ = MI_STORE_DWORD_IMM_GEN4;
1217				*batch++ = lower_32_bits(addr);
1218				*batch++ = upper_32_bits(addr);
1219				*batch++ = lower_32_bits(target_offset);
1220
1221				addr = gen8_canonical_addr(addr + 4);
1222
1223				*batch++ = MI_STORE_DWORD_IMM_GEN4;
1224				*batch++ = lower_32_bits(addr);
1225				*batch++ = upper_32_bits(addr);
1226				*batch++ = upper_32_bits(target_offset);
1227			} else {
1228				*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
1229				*batch++ = lower_32_bits(addr);
1230				*batch++ = upper_32_bits(addr);
1231				*batch++ = lower_32_bits(target_offset);
1232				*batch++ = upper_32_bits(target_offset);
1233			}
1234		} else if (gen >= 6) {
1235			*batch++ = MI_STORE_DWORD_IMM_GEN4;
1236			*batch++ = 0;
1237			*batch++ = addr;
1238			*batch++ = target_offset;
1239		} else if (gen >= 4) {
1240			*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1241			*batch++ = 0;
1242			*batch++ = addr;
1243			*batch++ = target_offset;
1244		} else {
1245			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
1246			*batch++ = addr;
1247			*batch++ = target_offset;
1248		}
1249
1250		goto out;
1251	}
1252
1253repeat:
1254	vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
1255	if (IS_ERR(vaddr))
1256		return PTR_ERR(vaddr);
1257
1258	clflush_write32(vaddr + offset_in_page(offset),
1259			lower_32_bits(target_offset),
1260			eb->reloc_cache.vaddr);
1261
1262	if (wide) {
1263		offset += sizeof(u32);
1264		target_offset >>= 32;
1265		wide = false;
1266		goto repeat;
1267	}
1268
1269out:
1270	return target->node.start | UPDATE;
1271}
1272
1273static u64
1274eb_relocate_entry(struct i915_execbuffer *eb,
1275		  struct i915_vma *vma,
1276		  const struct drm_i915_gem_relocation_entry *reloc)
1277{
1278	struct i915_vma *target;
1279	int err;
1280
1281	/* we've already hold a reference to all valid objects */
1282	target = eb_get_vma(eb, reloc->target_handle);
1283	if (unlikely(!target))
1284		return -ENOENT;
1285
1286	/* Validate that the target is in a valid r/w GPU domain */
1287	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
1288		DRM_DEBUG("reloc with multiple write domains: "
1289			  "target %d offset %d "
1290			  "read %08x write %08x",
1291			  reloc->target_handle,
1292			  (int) reloc->offset,
1293			  reloc->read_domains,
1294			  reloc->write_domain);
1295		return -EINVAL;
1296	}
1297	if (unlikely((reloc->write_domain | reloc->read_domains)
1298		     & ~I915_GEM_GPU_DOMAINS)) {
1299		DRM_DEBUG("reloc with read/write non-GPU domains: "
1300			  "target %d offset %d "
1301			  "read %08x write %08x",
1302			  reloc->target_handle,
1303			  (int) reloc->offset,
1304			  reloc->read_domains,
1305			  reloc->write_domain);
1306		return -EINVAL;
1307	}
1308
1309	if (reloc->write_domain) {
1310		*target->exec_flags |= EXEC_OBJECT_WRITE;
1311
1312		/*
1313		 * Sandybridge PPGTT errata: We need a global gtt mapping
1314		 * for MI and pipe_control writes because the gpu doesn't
1315		 * properly redirect them through the ppgtt for non_secure
1316		 * batchbuffers.
1317		 */
1318		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
1319		    IS_GEN6(eb->i915)) {
1320			err = i915_vma_bind(target, target->obj->cache_level,
1321					    PIN_GLOBAL);
1322			if (WARN_ONCE(err,
1323				      "Unexpected failure to bind target VMA!"))
1324				return err;
1325		}
1326	}
1327
1328	/*
1329	 * If the relocation already has the right value in it, no
1330	 * more work needs to be done.
1331	 */
1332	if (!DBG_FORCE_RELOC &&
1333	    gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
1334		return 0;
1335
1336	/* Check that the relocation address is valid... */
1337	if (unlikely(reloc->offset >
1338		     vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
1339		DRM_DEBUG("Relocation beyond object bounds: "
1340			  "target %d offset %d size %d.\n",
1341			  reloc->target_handle,
1342			  (int)reloc->offset,
1343			  (int)vma->size);
1344		return -EINVAL;
1345	}
1346	if (unlikely(reloc->offset & 3)) {
1347		DRM_DEBUG("Relocation not 4-byte aligned: "
1348			  "target %d offset %d.\n",
1349			  reloc->target_handle,
1350			  (int)reloc->offset);
1351		return -EINVAL;
1352	}
1353
1354	/*
1355	 * If we write into the object, we need to force the synchronisation
1356	 * barrier, either with an asynchronous clflush or if we executed the
1357	 * patching using the GPU (though that should be serialised by the
1358	 * timeline). To be completely sure, and since we are required to
1359	 * do relocations we are already stalling, disable the user's opt
1360	 * out of our synchronisation.
1361	 */
1362	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
1363
1364	/* and update the user's relocation entry */
1365	return relocate_entry(vma, reloc, eb, target);
1366}
1367
1368static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
1369{
1370#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1371	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
1372	struct drm_i915_gem_relocation_entry __user *urelocs;
1373	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1374	unsigned int remain;
1375
1376	urelocs = u64_to_user_ptr(entry->relocs_ptr);
1377	remain = entry->relocation_count;
1378	if (unlikely(remain > N_RELOC(ULONG_MAX)))
1379		return -EINVAL;
1380
1381	/*
1382	 * We must check that the entire relocation array is safe
1383	 * to read. However, if the array is not writable the user loses
1384	 * the updated relocation values.
1385	 */
1386	if (unlikely(!access_ok(VERIFY_READ, urelocs, remain*sizeof(*urelocs))))
1387		return -EFAULT;
1388
1389	do {
1390		struct drm_i915_gem_relocation_entry *r = stack;
1391		unsigned int count =
1392			min_t(unsigned int, remain, ARRAY_SIZE(stack));
1393		unsigned int copied;
1394
1395		/*
1396		 * This is the fast path and we cannot handle a pagefault
1397		 * whilst holding the struct mutex lest the user pass in the
1398		 * relocations contained within a mmaped bo. For in such a case
1399		 * we, the page fault handler would call i915_gem_fault() and
1400		 * we would try to acquire the struct mutex again. Obviously
1401		 * this is bad and so lockdep complains vehemently.
1402		 */
1403		pagefault_disable();
1404		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1405		pagefault_enable();
1406		if (unlikely(copied)) {
1407			remain = -EFAULT;
1408			goto out;
1409		}
1410
1411		remain -= count;
1412		do {
1413			u64 offset = eb_relocate_entry(eb, vma, r);
1414
1415			if (likely(offset == 0)) {
1416			} else if ((s64)offset < 0) {
1417				remain = (int)offset;
1418				goto out;
1419			} else {
1420				/*
1421				 * Note that reporting an error now
1422				 * leaves everything in an inconsistent
1423				 * state as we have *already* changed
1424				 * the relocation value inside the
1425				 * object. As we have not changed the
1426				 * reloc.presumed_offset or will not
1427				 * change the execobject.offset, on the
1428				 * call we may not rewrite the value
1429				 * inside the object, leaving it
1430				 * dangling and causing a GPU hang. Unless
1431				 * userspace dynamically rebuilds the
1432				 * relocations on each execbuf rather than
1433				 * presume a static tree.
1434				 *
1435				 * We did previously check if the relocations
1436				 * were writable (access_ok), an error now
1437				 * would be a strange race with mprotect,
1438				 * having already demonstrated that we
1439				 * can read from this userspace address.
1440				 */
1441				offset = gen8_canonical_addr(offset & ~UPDATE);
1442				__put_user(offset,
1443					   &urelocs[r-stack].presumed_offset);
1444			}
1445		} while (r++, --count);
1446		urelocs += ARRAY_SIZE(stack);
1447	} while (remain);
1448out:
1449	reloc_cache_reset(&eb->reloc_cache);
1450	return remain;
1451}
1452
1453static int
1454eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
 
 
 
 
 
 
1455{
1456	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1457	struct drm_i915_gem_relocation_entry *relocs =
1458		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1459	unsigned int i;
1460	int err;
1461
1462	for (i = 0; i < entry->relocation_count; i++) {
1463		u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
1464
1465		if ((s64)offset < 0) {
1466			err = (int)offset;
1467			goto err;
1468		}
1469	}
1470	err = 0;
1471err:
1472	reloc_cache_reset(&eb->reloc_cache);
1473	return err;
1474}
1475
1476static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1477{
1478	const char __user *addr, *end;
1479	unsigned long size;
1480	char __maybe_unused c;
1481
1482	size = entry->relocation_count;
1483	if (size == 0)
1484		return 0;
1485
1486	if (size > N_RELOC(ULONG_MAX))
1487		return -EINVAL;
1488
1489	addr = u64_to_user_ptr(entry->relocs_ptr);
1490	size *= sizeof(struct drm_i915_gem_relocation_entry);
1491	if (!access_ok(VERIFY_READ, addr, size))
1492		return -EFAULT;
1493
1494	end = addr + size;
1495	for (; addr < end; addr += PAGE_SIZE) {
1496		int err = __get_user(c, addr);
1497		if (err)
1498			return err;
1499	}
1500	return __get_user(c, end - 1);
1501}
1502
1503static int eb_copy_relocations(const struct i915_execbuffer *eb)
1504{
1505	const unsigned int count = eb->buffer_count;
1506	unsigned int i;
1507	int err;
1508
 
1509	for (i = 0; i < count; i++) {
1510		const unsigned int nreloc = eb->exec[i].relocation_count;
1511		struct drm_i915_gem_relocation_entry __user *urelocs;
1512		struct drm_i915_gem_relocation_entry *relocs;
1513		unsigned long size;
1514		unsigned long copied;
1515
1516		if (nreloc == 0)
1517			continue;
1518
1519		err = check_relocations(&eb->exec[i]);
1520		if (err)
1521			goto err;
1522
1523		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
1524		size = nreloc * sizeof(*relocs);
1525
1526		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
1527		if (!relocs) {
1528			kvfree(relocs);
1529			err = -ENOMEM;
1530			goto err;
1531		}
1532
1533		/* copy_from_user is limited to < 4GiB */
1534		copied = 0;
1535		do {
1536			unsigned int len =
1537				min_t(u64, BIT_ULL(31), size - copied);
1538
1539			if (__copy_from_user((char *)relocs + copied,
1540					     (char __user *)urelocs + copied,
1541					     len)) {
1542				kvfree(relocs);
1543				err = -EFAULT;
1544				goto err;
1545			}
1546
1547			copied += len;
1548		} while (copied < size);
1549
1550		/*
1551		 * As we do not update the known relocation offsets after
1552		 * relocating (due to the complexities in lock handling),
1553		 * we need to mark them as invalid now so that we force the
1554		 * relocation processing next time. Just in case the target
1555		 * object is evicted and then rebound into its old
1556		 * presumed_offset before the next execbuffer - if that
1557		 * happened we would make the mistake of assuming that the
1558		 * relocations were valid.
1559		 */
1560		user_access_begin();
1561		for (copied = 0; copied < nreloc; copied++)
1562			unsafe_put_user(-1,
1563					&urelocs[copied].presumed_offset,
1564					end_user);
1565end_user:
1566		user_access_end();
1567
1568		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1569	}
1570
1571	return 0;
1572
1573err:
1574	while (i--) {
1575		struct drm_i915_gem_relocation_entry *relocs =
1576			u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1577		if (eb->exec[i].relocation_count)
1578			kvfree(relocs);
1579	}
1580	return err;
1581}
1582
1583static int eb_prefault_relocations(const struct i915_execbuffer *eb)
1584{
1585	const unsigned int count = eb->buffer_count;
1586	unsigned int i;
1587
1588	if (unlikely(i915_modparams.prefault_disable))
1589		return 0;
1590
 
 
1591	for (i = 0; i < count; i++) {
1592		int err;
1593
1594		err = check_relocations(&eb->exec[i]);
1595		if (err)
1596			return err;
1597	}
1598
1599	return 0;
1600}
1601
1602static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
1603{
1604	struct drm_device *dev = &eb->i915->drm;
1605	bool have_copy = false;
1606	struct i915_vma *vma;
1607	int err = 0;
1608
1609repeat:
1610	if (signal_pending(current)) {
1611		err = -ERESTARTSYS;
1612		goto out;
1613	}
1614
1615	/* We may process another execbuffer during the unlock... */
1616	eb_reset_vmas(eb);
1617	mutex_unlock(&dev->struct_mutex);
1618
1619	/*
1620	 * We take 3 passes through the slowpatch.
1621	 *
1622	 * 1 - we try to just prefault all the user relocation entries and
1623	 * then attempt to reuse the atomic pagefault disabled fast path again.
1624	 *
1625	 * 2 - we copy the user entries to a local buffer here outside of the
1626	 * local and allow ourselves to wait upon any rendering before
1627	 * relocations
1628	 *
1629	 * 3 - we already have a local copy of the relocation entries, but
1630	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1631	 */
1632	if (!err) {
1633		err = eb_prefault_relocations(eb);
1634	} else if (!have_copy) {
1635		err = eb_copy_relocations(eb);
1636		have_copy = err == 0;
1637	} else {
1638		cond_resched();
1639		err = 0;
1640	}
1641	if (err) {
1642		mutex_lock(&dev->struct_mutex);
1643		goto out;
1644	}
1645
1646	/* A frequent cause for EAGAIN are currently unavailable client pages */
1647	flush_workqueue(eb->i915->mm.userptr_wq);
1648
1649	err = i915_mutex_lock_interruptible(dev);
1650	if (err) {
1651		mutex_lock(&dev->struct_mutex);
1652		goto out;
1653	}
1654
1655	/* reacquire the objects */
1656	err = eb_lookup_vmas(eb);
1657	if (err)
1658		goto err;
1659
1660	GEM_BUG_ON(!eb->batch);
1661
1662	list_for_each_entry(vma, &eb->relocs, reloc_link) {
1663		if (!have_copy) {
1664			pagefault_disable();
1665			err = eb_relocate_vma(eb, vma);
1666			pagefault_enable();
1667			if (err)
1668				goto repeat;
1669		} else {
1670			err = eb_relocate_vma_slow(eb, vma);
1671			if (err)
1672				goto err;
1673		}
1674	}
1675
1676	/*
1677	 * Leave the user relocations as are, this is the painfully slow path,
1678	 * and we want to avoid the complication of dropping the lock whilst
1679	 * having buffers reserved in the aperture and so causing spurious
1680	 * ENOSPC for random operations.
1681	 */
1682
1683err:
1684	if (err == -EAGAIN)
1685		goto repeat;
1686
1687out:
1688	if (have_copy) {
1689		const unsigned int count = eb->buffer_count;
1690		unsigned int i;
1691
1692		for (i = 0; i < count; i++) {
1693			const struct drm_i915_gem_exec_object2 *entry =
1694				&eb->exec[i];
1695			struct drm_i915_gem_relocation_entry *relocs;
1696
1697			if (!entry->relocation_count)
1698				continue;
1699
1700			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1701			kvfree(relocs);
1702		}
1703	}
1704
1705	return err;
1706}
1707
1708static int eb_relocate(struct i915_execbuffer *eb)
 
 
 
 
1709{
1710	if (eb_lookup_vmas(eb))
1711		goto slow;
 
 
 
1712
1713	/* The objects are in their final locations, apply the relocations. */
1714	if (eb->args->flags & __EXEC_HAS_RELOC) {
1715		struct i915_vma *vma;
1716
1717		list_for_each_entry(vma, &eb->relocs, reloc_link) {
1718			if (eb_relocate_vma(eb, vma))
1719				goto slow;
1720		}
 
 
 
 
 
1721	}
1722
1723	return 0;
1724
1725slow:
1726	return eb_relocate_slow(eb);
1727}
1728
1729static void eb_export_fence(struct i915_vma *vma,
1730			    struct i915_request *rq,
1731			    unsigned int flags)
1732{
1733	struct reservation_object *resv = vma->resv;
 
 
1734
1735	/*
1736	 * Ignore errors from failing to allocate the new fence, we can't
1737	 * handle an error right now. Worst case should be missed
1738	 * synchronisation leading to rendering corruption.
1739	 */
1740	reservation_object_lock(resv, NULL);
1741	if (flags & EXEC_OBJECT_WRITE)
1742		reservation_object_add_excl_fence(resv, &rq->fence);
1743	else if (reservation_object_reserve_shared(resv) == 0)
1744		reservation_object_add_shared_fence(resv, &rq->fence);
1745	reservation_object_unlock(resv);
1746}
1747
1748static int eb_move_to_gpu(struct i915_execbuffer *eb)
1749{
1750	const unsigned int count = eb->buffer_count;
1751	unsigned int i;
1752	int err;
1753
1754	for (i = 0; i < count; i++) {
1755		unsigned int flags = eb->flags[i];
1756		struct i915_vma *vma = eb->vma[i];
1757		struct drm_i915_gem_object *obj = vma->obj;
1758
1759		if (flags & EXEC_OBJECT_CAPTURE) {
1760			struct i915_capture_list *capture;
1761
1762			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1763			if (unlikely(!capture))
1764				return -ENOMEM;
1765
1766			capture->next = eb->request->capture_list;
1767			capture->vma = eb->vma[i];
1768			eb->request->capture_list = capture;
1769		}
1770
1771		/*
1772		 * If the GPU is not _reading_ through the CPU cache, we need
1773		 * to make sure that any writes (both previous GPU writes from
1774		 * before a change in snooping levels and normal CPU writes)
1775		 * caught in that cache are flushed to main memory.
1776		 *
1777		 * We want to say
1778		 *   obj->cache_dirty &&
1779		 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
1780		 * but gcc's optimiser doesn't handle that as well and emits
1781		 * two jumps instead of one. Maybe one day...
1782		 */
1783		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
1784			if (i915_gem_clflush_object(obj, 0))
1785				flags &= ~EXEC_OBJECT_ASYNC;
1786		}
1787
1788		if (flags & EXEC_OBJECT_ASYNC)
1789			continue;
1790
1791		err = i915_request_await_object
1792			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
1793		if (err)
1794			return err;
1795	}
1796
1797	for (i = 0; i < count; i++) {
1798		unsigned int flags = eb->flags[i];
1799		struct i915_vma *vma = eb->vma[i];
 
 
1800
1801		i915_vma_move_to_active(vma, eb->request, flags);
1802		eb_export_fence(vma, eb->request, flags);
1803
1804		__eb_unreserve_vma(vma, flags);
1805		vma->exec_flags = NULL;
1806
1807		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
1808			i915_vma_put(vma);
1809	}
1810	eb->exec = NULL;
1811
1812	/* Unconditionally flush any chipset caches (for streaming writes). */
1813	i915_gem_chipset_flush(eb->i915);
1814
1815	return 0;
 
1816}
1817
1818static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 
1819{
1820	if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
1821		return false;
1822
1823	/* Kernel clipping was a DRI1 misfeature */
1824	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
1825		if (exec->num_cliprects || exec->cliprects_ptr)
1826			return false;
1827	}
1828
1829	if (exec->DR4 == 0xffffffff) {
1830		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1831		exec->DR4 = 0;
1832	}
1833	if (exec->DR1 || exec->DR4)
1834		return false;
1835
1836	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1837		return false;
1838
1839	return true;
1840}
1841
1842void i915_vma_move_to_active(struct i915_vma *vma,
1843			     struct i915_request *rq,
1844			     unsigned int flags)
1845{
1846	struct drm_i915_gem_object *obj = vma->obj;
1847	const unsigned int idx = rq->engine->id;
1848
1849	lockdep_assert_held(&rq->i915->drm.struct_mutex);
1850	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
1851
1852	/*
1853	 * Add a reference if we're newly entering the active list.
1854	 * The order in which we add operations to the retirement queue is
1855	 * vital here: mark_active adds to the start of the callback list,
1856	 * such that subsequent callbacks are called first. Therefore we
1857	 * add the active reference first and queue for it to be dropped
1858	 * *last*.
1859	 */
1860	if (!i915_vma_is_active(vma))
1861		obj->active_count++;
1862	i915_vma_set_active(vma, idx);
1863	i915_gem_active_set(&vma->last_read[idx], rq);
1864	list_move_tail(&vma->vm_link, &vma->vm->active_list);
1865
1866	obj->write_domain = 0;
1867	if (flags & EXEC_OBJECT_WRITE) {
1868		obj->write_domain = I915_GEM_DOMAIN_RENDER;
1869
1870		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
1871			i915_gem_active_set(&obj->frontbuffer_write, rq);
1872
1873		obj->read_domains = 0;
1874	}
1875	obj->read_domains |= I915_GEM_GPU_DOMAINS;
1876
1877	if (flags & EXEC_OBJECT_NEEDS_FENCE)
1878		i915_gem_active_set(&vma->last_fence, rq);
1879}
1880
1881static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
1882{
1883	u32 *cs;
1884	int i;
1885
1886	if (!IS_GEN7(rq->i915) || rq->engine->id != RCS) {
1887		DRM_DEBUG("sol reset is gen7/rcs only\n");
1888		return -EINVAL;
 
 
 
 
1889	}
1890
1891	cs = intel_ring_begin(rq, 4 * 2 + 2);
1892	if (IS_ERR(cs))
1893		return PTR_ERR(cs);
1894
1895	*cs++ = MI_LOAD_REGISTER_IMM(4);
1896	for (i = 0; i < 4; i++) {
1897		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1898		*cs++ = 0;
1899	}
1900	*cs++ = MI_NOOP;
1901	intel_ring_advance(rq, cs);
1902
1903	return 0;
1904}
1905
1906static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
1907{
1908	struct drm_i915_gem_object *shadow_batch_obj;
1909	struct i915_vma *vma;
1910	int err;
1911
1912	shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
1913						   PAGE_ALIGN(eb->batch_len));
1914	if (IS_ERR(shadow_batch_obj))
1915		return ERR_CAST(shadow_batch_obj);
1916
1917	err = intel_engine_cmd_parser(eb->engine,
1918				      eb->batch->obj,
1919				      shadow_batch_obj,
1920				      eb->batch_start_offset,
1921				      eb->batch_len,
1922				      is_master);
1923	if (err) {
1924		if (err == -EACCES) /* unhandled chained batch */
1925			vma = NULL;
1926		else
1927			vma = ERR_PTR(err);
1928		goto out;
1929	}
1930
1931	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1932	if (IS_ERR(vma))
1933		goto out;
1934
1935	eb->vma[eb->buffer_count] = i915_vma_get(vma);
1936	eb->flags[eb->buffer_count] =
1937		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
1938	vma->exec_flags = &eb->flags[eb->buffer_count];
1939	eb->buffer_count++;
1940
1941out:
1942	i915_gem_object_unpin_pages(shadow_batch_obj);
1943	return vma;
1944}
1945
1946static void
1947add_to_client(struct i915_request *rq, struct drm_file *file)
1948{
1949	rq->file_priv = file->driver_priv;
1950	list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
1951}
1952
1953static int eb_submit(struct i915_execbuffer *eb)
 
 
1954{
1955	int err;
 
 
1956
1957	err = eb_move_to_gpu(eb);
1958	if (err)
1959		return err;
1960
1961	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
1962		err = i915_reset_gen7_sol_offsets(eb->request);
1963		if (err)
1964			return err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1965	}
1966
1967	err = eb->engine->emit_bb_start(eb->request,
1968					eb->batch->node.start +
1969					eb->batch_start_offset,
1970					eb->batch_len,
1971					eb->batch_flags);
1972	if (err)
1973		return err;
1974
1975	return 0;
1976}
1977
1978/*
1979 * Find one BSD ring to dispatch the corresponding BSD command.
1980 * The engine index is returned.
1981 */
1982static unsigned int
1983gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1984			 struct drm_file *file)
1985{
1986	struct drm_i915_file_private *file_priv = file->driver_priv;
1987
1988	/* Check whether the file_priv has already selected one ring. */
1989	if ((int)file_priv->bsd_engine < 0)
1990		file_priv->bsd_engine = atomic_fetch_xor(1,
1991			 &dev_priv->mm.bsd_engine_dispatch_index);
1992
1993	return file_priv->bsd_engine;
1994}
1995
1996#define I915_USER_RINGS (4)
 
 
 
 
1997
1998static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
1999	[I915_EXEC_DEFAULT]	= RCS,
2000	[I915_EXEC_RENDER]	= RCS,
2001	[I915_EXEC_BLT]		= BCS,
2002	[I915_EXEC_BSD]		= VCS,
2003	[I915_EXEC_VEBOX]	= VECS
2004};
2005
2006static struct intel_engine_cs *
2007eb_select_engine(struct drm_i915_private *dev_priv,
2008		 struct drm_file *file,
2009		 struct drm_i915_gem_execbuffer2 *args)
2010{
2011	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
2012	struct intel_engine_cs *engine;
2013
2014	if (user_ring_id > I915_USER_RINGS) {
2015		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
2016		return NULL;
2017	}
2018
2019	if ((user_ring_id != I915_EXEC_BSD) &&
2020	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
2021		DRM_DEBUG("execbuf with non bsd ring but with invalid "
2022			  "bsd dispatch flags: %d\n", (int)(args->flags));
2023		return NULL;
2024	}
2025
2026	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
2027		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
2028
2029		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
2030			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
2031		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
2032			   bsd_idx <= I915_EXEC_BSD_RING2) {
2033			bsd_idx >>= I915_EXEC_BSD_SHIFT;
2034			bsd_idx--;
2035		} else {
2036			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
2037				  bsd_idx);
2038			return NULL;
2039		}
2040
2041		engine = dev_priv->engine[_VCS(bsd_idx)];
2042	} else {
2043		engine = dev_priv->engine[user_ring_map[user_ring_id]];
2044	}
 
 
 
 
2045
2046	if (!engine) {
2047		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
2048		return NULL;
2049	}
2050
2051	return engine;
2052}
2053
2054static void
2055__free_fence_array(struct drm_syncobj **fences, unsigned int n)
 
 
2056{
2057	while (n--)
2058		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
2059	kvfree(fences);
2060}
2061
2062static struct drm_syncobj **
2063get_fence_array(struct drm_i915_gem_execbuffer2 *args,
2064		struct drm_file *file)
2065{
2066	const unsigned long nfences = args->num_cliprects;
2067	struct drm_i915_gem_exec_fence __user *user;
2068	struct drm_syncobj **fences;
2069	unsigned long n;
2070	int err;
2071
2072	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
2073		return NULL;
2074
2075	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
2076	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2077	if (nfences > min_t(unsigned long,
2078			    ULONG_MAX / sizeof(*user),
2079			    SIZE_MAX / sizeof(*fences)))
2080		return ERR_PTR(-EINVAL);
2081
2082	user = u64_to_user_ptr(args->cliprects_ptr);
2083	if (!access_ok(VERIFY_READ, user, nfences * sizeof(*user)))
2084		return ERR_PTR(-EFAULT);
2085
2086	fences = kvmalloc_array(nfences, sizeof(*fences),
2087				__GFP_NOWARN | GFP_KERNEL);
2088	if (!fences)
2089		return ERR_PTR(-ENOMEM);
2090
2091	for (n = 0; n < nfences; n++) {
2092		struct drm_i915_gem_exec_fence fence;
2093		struct drm_syncobj *syncobj;
2094
2095		if (__copy_from_user(&fence, user++, sizeof(fence))) {
2096			err = -EFAULT;
2097			goto err;
2098		}
2099
2100		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
2101			err = -EINVAL;
2102			goto err;
2103		}
2104
2105		syncobj = drm_syncobj_find(file, fence.handle);
2106		if (!syncobj) {
2107			DRM_DEBUG("Invalid syncobj handle provided\n");
2108			err = -ENOENT;
2109			goto err;
 
 
2110		}
2111
2112		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2113			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2114
2115		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
2116	}
2117
2118	return fences;
2119
2120err:
2121	__free_fence_array(fences, n);
2122	return ERR_PTR(err);
2123}
2124
2125static void
2126put_fence_array(struct drm_i915_gem_execbuffer2 *args,
2127		struct drm_syncobj **fences)
 
2128{
2129	if (fences)
2130		__free_fence_array(fences, args->num_cliprects);
2131}
2132
2133static int
2134await_fence_array(struct i915_execbuffer *eb,
2135		  struct drm_syncobj **fences)
2136{
2137	const unsigned int nfences = eb->args->num_cliprects;
2138	unsigned int n;
2139	int err;
2140
2141	for (n = 0; n < nfences; n++) {
2142		struct drm_syncobj *syncobj;
2143		struct dma_fence *fence;
2144		unsigned int flags;
2145
2146		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2147		if (!(flags & I915_EXEC_FENCE_WAIT))
2148			continue;
2149
2150		fence = drm_syncobj_fence_get(syncobj);
2151		if (!fence)
2152			return -EINVAL;
2153
2154		err = i915_request_await_dma_fence(eb->request, fence);
2155		dma_fence_put(fence);
2156		if (err < 0)
2157			return err;
2158	}
2159
2160	return 0;
2161}
2162
2163static void
2164signal_fence_array(struct i915_execbuffer *eb,
2165		   struct drm_syncobj **fences)
2166{
2167	const unsigned int nfences = eb->args->num_cliprects;
2168	struct dma_fence * const fence = &eb->request->fence;
2169	unsigned int n;
2170
2171	for (n = 0; n < nfences; n++) {
2172		struct drm_syncobj *syncobj;
2173		unsigned int flags;
2174
2175		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2176		if (!(flags & I915_EXEC_FENCE_SIGNAL))
2177			continue;
2178
2179		drm_syncobj_replace_fence(syncobj, fence);
2180	}
2181}
2182
2183static int
2184i915_gem_do_execbuffer(struct drm_device *dev,
2185		       struct drm_file *file,
2186		       struct drm_i915_gem_execbuffer2 *args,
2187		       struct drm_i915_gem_exec_object2 *exec,
2188		       struct drm_syncobj **fences)
2189{
2190	struct i915_execbuffer eb;
2191	struct dma_fence *in_fence = NULL;
2192	struct sync_file *out_fence = NULL;
2193	int out_fence_fd = -1;
2194	int err;
2195
2196	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
2197	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
2198		     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
2199
2200	eb.i915 = to_i915(dev);
2201	eb.file = file;
2202	eb.args = args;
2203	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
2204		args->flags |= __EXEC_HAS_RELOC;
2205
2206	eb.exec = exec;
2207	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
2208	eb.vma[0] = NULL;
2209	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
2210
2211	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
2212	if (USES_FULL_PPGTT(eb.i915))
2213		eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
2214	reloc_cache_init(&eb.reloc_cache, eb.i915);
2215
2216	eb.buffer_count = args->buffer_count;
2217	eb.batch_start_offset = args->batch_start_offset;
2218	eb.batch_len = args->batch_len;
2219
2220	eb.batch_flags = 0;
2221	if (args->flags & I915_EXEC_SECURE) {
2222		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
2223		    return -EPERM;
2224
2225		eb.batch_flags |= I915_DISPATCH_SECURE;
 
 
2226	}
2227	if (args->flags & I915_EXEC_IS_PINNED)
2228		eb.batch_flags |= I915_DISPATCH_PINNED;
2229
2230	eb.engine = eb_select_engine(eb.i915, file, args);
2231	if (!eb.engine)
2232		return -EINVAL;
2233
2234	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
2235		if (!HAS_RESOURCE_STREAMER(eb.i915)) {
2236			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
 
 
 
 
 
 
 
 
 
2237			return -EINVAL;
2238		}
2239		if (eb.engine->id != RCS) {
2240			DRM_DEBUG("RS is not available on %s\n",
2241				 eb.engine->name);
 
 
2242			return -EINVAL;
2243		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2244
2245		eb.batch_flags |= I915_DISPATCH_RS;
 
 
2246	}
2247
2248	if (args->flags & I915_EXEC_FENCE_IN) {
2249		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2250		if (!in_fence)
2251			return -EINVAL;
2252	}
 
 
 
 
 
 
 
2253
2254	if (args->flags & I915_EXEC_FENCE_OUT) {
2255		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
2256		if (out_fence_fd < 0) {
2257			err = out_fence_fd;
2258			goto err_in_fence;
 
2259		}
2260	}
2261
2262	err = eb_create(&eb);
2263	if (err)
2264		goto err_out_fence;
2265
2266	GEM_BUG_ON(!eb.lut_size);
2267
2268	err = eb_select_context(&eb);
2269	if (unlikely(err))
2270		goto err_destroy;
2271
2272	/*
2273	 * Take a local wakeref for preparing to dispatch the execbuf as
2274	 * we expect to access the hardware fairly frequently in the
2275	 * process. Upon first dispatch, we acquire another prolonged
2276	 * wakeref that we hold until the GPU has been idle for at least
2277	 * 100ms.
2278	 */
2279	intel_runtime_pm_get(eb.i915);
2280
2281	err = i915_mutex_lock_interruptible(dev);
2282	if (err)
2283		goto err_rpm;
2284
2285	err = eb_relocate(&eb);
2286	if (err) {
2287		/*
2288		 * If the user expects the execobject.offset and
2289		 * reloc.presumed_offset to be an exact match,
2290		 * as for using NO_RELOC, then we cannot update
2291		 * the execobject.offset until we have completed
2292		 * relocation.
2293		 */
2294		args->flags &= ~__EXEC_HAS_RELOC;
2295		goto err_vma;
2296	}
2297
2298	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
2299		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
2300		err = -EINVAL;
2301		goto err_vma;
2302	}
2303	if (eb.batch_start_offset > eb.batch->size ||
2304	    eb.batch_len > eb.batch->size - eb.batch_start_offset) {
2305		DRM_DEBUG("Attempting to use out-of-bounds batch\n");
2306		err = -EINVAL;
2307		goto err_vma;
2308	}
2309
2310	if (eb_use_cmdparser(&eb)) {
2311		struct i915_vma *vma;
 
 
2312
2313		vma = eb_parse(&eb, drm_is_current_master(file));
2314		if (IS_ERR(vma)) {
2315			err = PTR_ERR(vma);
2316			goto err_vma;
 
 
 
 
2317		}
2318
2319		if (vma) {
2320			/*
2321			 * Batch parsed and accepted:
2322			 *
2323			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
2324			 * bit from MI_BATCH_BUFFER_START commands issued in
2325			 * the dispatch_execbuffer implementations. We
2326			 * specifically don't want that set on batches the
2327			 * command parser has accepted.
2328			 */
2329			eb.batch_flags |= I915_DISPATCH_SECURE;
2330			eb.batch_start_offset = 0;
2331			eb.batch = vma;
2332		}
 
 
 
 
 
2333	}
2334
2335	if (eb.batch_len == 0)
2336		eb.batch_len = eb.batch->size - eb.batch_start_offset;
 
 
 
 
 
 
 
2337
2338	/*
2339	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2340	 * batch" bit. Hence we need to pin secure batches into the global gtt.
2341	 * hsw should have this fixed, but bdw mucks it up again. */
2342	if (eb.batch_flags & I915_DISPATCH_SECURE) {
2343		struct i915_vma *vma;
2344
2345		/*
2346		 * So on first glance it looks freaky that we pin the batch here
2347		 * outside of the reservation loop. But:
2348		 * - The batch is already pinned into the relevant ppgtt, so we
2349		 *   already have the backing storage fully allocated.
2350		 * - No other BO uses the global gtt (well contexts, but meh),
2351		 *   so we don't really have issues with multiple objects not
2352		 *   fitting due to fragmentation.
2353		 * So this is actually safe.
2354		 */
2355		vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
2356		if (IS_ERR(vma)) {
2357			err = PTR_ERR(vma);
2358			goto err_vma;
2359		}
 
 
 
2360
2361		eb.batch = vma;
 
 
 
 
2362	}
 
 
 
 
 
2363
2364	/* All GPU relocation batches must be submitted prior to the user rq */
2365	GEM_BUG_ON(eb.reloc_cache.rq);
 
 
 
 
 
 
 
 
2366
2367	/* Allocate a request for this batch buffer nice and early. */
2368	eb.request = i915_request_alloc(eb.engine, eb.ctx);
2369	if (IS_ERR(eb.request)) {
2370		err = PTR_ERR(eb.request);
2371		goto err_batch_unpin;
2372	}
2373
2374	if (in_fence) {
2375		err = i915_request_await_dma_fence(eb.request, in_fence);
2376		if (err < 0)
2377			goto err_request;
2378	}
2379
2380	if (fences) {
2381		err = await_fence_array(&eb, fences);
2382		if (err)
2383			goto err_request;
2384	}
 
 
 
2385
2386	if (out_fence_fd != -1) {
2387		out_fence = sync_file_create(&eb.request->fence);
2388		if (!out_fence) {
2389			err = -ENOMEM;
2390			goto err_request;
2391		}
 
 
 
 
2392	}
2393
2394	/*
2395	 * Whilst this request exists, batch_obj will be on the
2396	 * active_list, and so will hold the active reference. Only when this
2397	 * request is retired will the the batch_obj be moved onto the
2398	 * inactive_list and lose its active reference. Hence we do not need
2399	 * to explicitly hold another reference here.
2400	 */
2401	eb.request->batch = eb.batch;
2402
2403	trace_i915_request_queue(eb.request, eb.batch_flags);
2404	err = eb_submit(&eb);
2405err_request:
2406	__i915_request_add(eb.request, err == 0);
2407	add_to_client(eb.request, file);
2408
2409	if (fences)
2410		signal_fence_array(&eb, fences);
2411
2412	if (out_fence) {
2413		if (err == 0) {
2414			fd_install(out_fence_fd, out_fence->file);
2415			args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
2416			args->rsvd2 |= (u64)out_fence_fd << 32;
2417			out_fence_fd = -1;
2418		} else {
2419			fput(out_fence->file);
2420		}
2421	}
2422
2423err_batch_unpin:
2424	if (eb.batch_flags & I915_DISPATCH_SECURE)
2425		i915_vma_unpin(eb.batch);
2426err_vma:
2427	if (eb.exec)
2428		eb_release_vmas(&eb);
2429	mutex_unlock(&dev->struct_mutex);
2430err_rpm:
2431	intel_runtime_pm_put(eb.i915);
2432	i915_gem_context_put(eb.ctx);
2433err_destroy:
2434	eb_destroy(&eb);
2435err_out_fence:
2436	if (out_fence_fd != -1)
2437		put_unused_fd(out_fence_fd);
2438err_in_fence:
2439	dma_fence_put(in_fence);
2440	return err;
2441}
2442
2443static size_t eb_element_size(void)
2444{
2445	return (sizeof(struct drm_i915_gem_exec_object2) +
2446		sizeof(struct i915_vma *) +
2447		sizeof(unsigned int));
2448}
2449
2450static bool check_buffer_count(size_t count)
2451{
2452	const size_t sz = eb_element_size();
2453
2454	/*
2455	 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
2456	 * array size (see eb_create()). Otherwise, we can accept an array as
2457	 * large as can be addressed (though use large arrays at your peril)!
2458	 */
2459
2460	return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
2461}
2462
2463/*
2464 * Legacy execbuffer just creates an exec2 list from the original exec object
2465 * list array and passes it to the real function.
2466 */
2467int
2468i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
2469			  struct drm_file *file)
2470{
2471	struct drm_i915_gem_execbuffer *args = data;
2472	struct drm_i915_gem_execbuffer2 exec2;
2473	struct drm_i915_gem_exec_object *exec_list = NULL;
2474	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
2475	const size_t count = args->buffer_count;
2476	unsigned int i;
2477	int err;
2478
2479	if (!check_buffer_count(count)) {
2480		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2481		return -EINVAL;
2482	}
2483
2484	exec2.buffers_ptr = args->buffers_ptr;
2485	exec2.buffer_count = args->buffer_count;
2486	exec2.batch_start_offset = args->batch_start_offset;
2487	exec2.batch_len = args->batch_len;
2488	exec2.DR1 = args->DR1;
2489	exec2.DR4 = args->DR4;
2490	exec2.num_cliprects = args->num_cliprects;
2491	exec2.cliprects_ptr = args->cliprects_ptr;
2492	exec2.flags = I915_EXEC_RENDER;
2493	i915_execbuffer2_set_context_id(exec2, 0);
2494
2495	if (!i915_gem_check_execbuffer(&exec2))
2496		return -EINVAL;
2497
2498	/* Copy in the exec list from userland */
2499	exec_list = kvmalloc_array(count, sizeof(*exec_list),
2500				   __GFP_NOWARN | GFP_KERNEL);
2501	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2502				    __GFP_NOWARN | GFP_KERNEL);
2503	if (exec_list == NULL || exec2_list == NULL) {
2504		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
2505			  args->buffer_count);
2506		kvfree(exec_list);
2507		kvfree(exec2_list);
2508		return -ENOMEM;
2509	}
2510	err = copy_from_user(exec_list,
2511			     u64_to_user_ptr(args->buffers_ptr),
2512			     sizeof(*exec_list) * count);
2513	if (err) {
2514		DRM_DEBUG("copy %d exec entries failed %d\n",
2515			  args->buffer_count, err);
2516		kvfree(exec_list);
2517		kvfree(exec2_list);
 
2518		return -EFAULT;
2519	}
2520
2521	for (i = 0; i < args->buffer_count; i++) {
2522		exec2_list[i].handle = exec_list[i].handle;
2523		exec2_list[i].relocation_count = exec_list[i].relocation_count;
2524		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
2525		exec2_list[i].alignment = exec_list[i].alignment;
2526		exec2_list[i].offset = exec_list[i].offset;
2527		if (INTEL_GEN(to_i915(dev)) < 4)
2528			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
2529		else
2530			exec2_list[i].flags = 0;
2531	}
2532
2533	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
2534	if (exec2.flags & __EXEC_HAS_RELOC) {
2535		struct drm_i915_gem_exec_object __user *user_exec_list =
2536			u64_to_user_ptr(args->buffers_ptr);
 
 
 
 
 
2537
 
 
2538		/* Copy the new buffer offsets back to the user's exec list. */
2539		for (i = 0; i < args->buffer_count; i++) {
2540			if (!(exec2_list[i].offset & UPDATE))
2541				continue;
2542
2543			exec2_list[i].offset =
2544				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2545			exec2_list[i].offset &= PIN_OFFSET_MASK;
2546			if (__copy_to_user(&user_exec_list[i].offset,
2547					   &exec2_list[i].offset,
2548					   sizeof(user_exec_list[i].offset)))
2549				break;
 
2550		}
2551	}
2552
2553	kvfree(exec_list);
2554	kvfree(exec2_list);
2555	return err;
2556}
2557
2558int
2559i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
2560			   struct drm_file *file)
2561{
2562	struct drm_i915_gem_execbuffer2 *args = data;
2563	struct drm_i915_gem_exec_object2 *exec2_list;
2564	struct drm_syncobj **fences = NULL;
2565	const size_t count = args->buffer_count;
2566	int err;
2567
2568	if (!check_buffer_count(count)) {
2569		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2570		return -EINVAL;
2571	}
2572
2573	if (!i915_gem_check_execbuffer(args))
2574		return -EINVAL;
2575
2576	/* Allocate an extra slot for use by the command parser */
2577	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2578				    __GFP_NOWARN | GFP_KERNEL);
2579	if (exec2_list == NULL) {
2580		DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
2581			  count);
2582		return -ENOMEM;
2583	}
2584	if (copy_from_user(exec2_list,
2585			   u64_to_user_ptr(args->buffers_ptr),
2586			   sizeof(*exec2_list) * count)) {
2587		DRM_DEBUG("copy %zd exec entries failed\n", count);
2588		kvfree(exec2_list);
 
 
 
2589		return -EFAULT;
2590	}
2591
2592	if (args->flags & I915_EXEC_FENCE_ARRAY) {
2593		fences = get_fence_array(args, file);
2594		if (IS_ERR(fences)) {
2595			kvfree(exec2_list);
2596			return PTR_ERR(fences);
2597		}
2598	}
2599
2600	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
2601
2602	/*
2603	 * Now that we have begun execution of the batchbuffer, we ignore
2604	 * any new error after this point. Also given that we have already
2605	 * updated the associated relocations, we try to write out the current
2606	 * object locations irrespective of any error.
2607	 */
2608	if (args->flags & __EXEC_HAS_RELOC) {
2609		struct drm_i915_gem_exec_object2 __user *user_exec_list =
2610			u64_to_user_ptr(args->buffers_ptr);
2611		unsigned int i;
2612
2613		/* Copy the new buffer offsets back to the user's exec list. */
2614		user_access_begin();
2615		for (i = 0; i < args->buffer_count; i++) {
2616			if (!(exec2_list[i].offset & UPDATE))
2617				continue;
2618
2619			exec2_list[i].offset =
2620				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2621			unsafe_put_user(exec2_list[i].offset,
2622					&user_exec_list[i].offset,
2623					end_user);
2624		}
2625end_user:
2626		user_access_end();
2627	}
2628
2629	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
2630	put_fence_array(args, fences);
2631	kvfree(exec2_list);
2632	return err;
2633}