i915_gem_execbuffer.c - drivers/gpu/drm/i915/i915_gem_execbuffer.c - Linux diff v4.6

   1/*
   2 * Copyright © 2008,2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Chris Wilson <chris@chris-wilson.co.uk>
  26 *
  27 */
  28
 
 
 
 
 
  29#include <drm/drmP.h>
 
  30#include <drm/i915_drm.h>
 
  31#include "i915_drv.h"
 
  32#include "i915_trace.h"
  33#include "intel_drv.h"
  34#include <linux/dma_remapping.h>
  35#include <linux/uaccess.h>
  36
  37#define  __EXEC_OBJECT_HAS_PIN (1<<31)
  38#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
  39#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
  40#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  41
  42#define BATCH_OFFSET_BIAS (256*1024)
  43
  44struct eb_vmas {
  45	struct list_head vmas;
  46	int and;
  47	union {
  48		struct i915_vma *lut[0];
  49		struct hlist_head buckets[0];
  50	};
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  51};
  52
  53static struct eb_vmas *
  54eb_create(struct drm_i915_gem_execbuffer2 *args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  55{
  56	struct eb_vmas *eb = NULL;
 
  57
  58	if (args->flags & I915_EXEC_HANDLE_LUT) {
  59		unsigned size = args->buffer_count;
  60		size *= sizeof(struct i915_vma *);
  61		size += sizeof(struct eb_vmas);
  62		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
  63	}
  64
  65	if (eb == NULL) {
  66		unsigned size = args->buffer_count;
  67		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
  68		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
  69		while (count > 2*size)
  70			count >>= 1;
  71		eb = kzalloc(count*sizeof(struct hlist_head) +
  72			     sizeof(struct eb_vmas),
  73			     GFP_TEMPORARY);
  74		if (eb == NULL)
  75			return eb;
  76
  77		eb->and = count - 1;
  78	} else
  79		eb->and = -args->buffer_count;
 
  80
  81	INIT_LIST_HEAD(&eb->vmas);
  82	return eb;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  83}
  84
  85static void
  86eb_reset(struct eb_vmas *eb)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  87{
  88	if (eb->and >= 0)
  89		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  90}
  91
  92static int
  93eb_lookup_vmas(struct eb_vmas *eb,
  94	       struct drm_i915_gem_exec_object2 *exec,
  95	       const struct drm_i915_gem_execbuffer2 *args,
  96	       struct i915_address_space *vm,
  97	       struct drm_file *file)
  98{
  99	struct drm_i915_gem_object *obj;
 100	struct list_head objects;
 101	int i, ret;
 102
 103	INIT_LIST_HEAD(&objects);
 104	spin_lock(&file->table_lock);
 105	/* Grab a reference to the object and release the lock so we can lookup
 106	 * or create the VMA without using GFP_ATOMIC */
 107	for (i = 0; i < args->buffer_count; i++) {
 108		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
 109		if (obj == NULL) {
 110			spin_unlock(&file->table_lock);
 111			DRM_DEBUG("Invalid object handle %d at index %d\n",
 112				   exec[i].handle, i);
 113			ret = -ENOENT;
 114			goto err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 115		}
 
 
 116
 117		if (!list_empty(&obj->obj_exec_link)) {
 118			spin_unlock(&file->table_lock);
 119			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
 120				   obj, exec[i].handle, i);
 121			ret = -EINVAL;
 122			goto err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 123		}
 124
 125		drm_gem_object_reference(&obj->base);
 126		list_add_tail(&obj->obj_exec_link, &objects);
 127	}
 128	spin_unlock(&file->table_lock);
 129
 130	i = 0;
 131	while (!list_empty(&objects)) {
 132		struct i915_vma *vma;
 133
 134		obj = list_first_entry(&objects,
 135				       struct drm_i915_gem_object,
 136				       obj_exec_link);
 137
 138		/*
 139		 * NOTE: We can leak any vmas created here when something fails
 140		 * later on. But that's no issue since vma_unbind can deal with
 141		 * vmas which are not actually bound. And since only
 142		 * lookup_or_create exists as an interface to get at the vma
 143		 * from the (obj, vm) we don't run the risk of creating
 144		 * duplicated vmas for the same vm.
 145		 */
 146		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
 147		if (IS_ERR(vma)) {
 148			DRM_DEBUG("Failed to lookup VMA\n");
 149			ret = PTR_ERR(vma);
 150			goto err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 151		}
 
 
 152
 153		/* Transfer ownership from the objects list to the vmas list. */
 154		list_add_tail(&vma->exec_list, &eb->vmas);
 155		list_del_init(&obj->obj_exec_link);
 156
 157		vma->exec_entry = &exec[i];
 158		if (eb->and < 0) {
 159			eb->lut[i] = vma;
 160		} else {
 161			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
 162			vma->exec_handle = handle;
 163			hlist_add_head(&vma->exec_node,
 164				       &eb->buckets[handle & eb->and]);
 
 
 
 
 
 
 
 165		}
 166		++i;
 167	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 168
 169	return 0;
 
 170
 
 
 
 
 
 
 171
 172err:
 173	while (!list_empty(&objects)) {
 174		obj = list_first_entry(&objects,
 175				       struct drm_i915_gem_object,
 176				       obj_exec_link);
 177		list_del_init(&obj->obj_exec_link);
 178		drm_gem_object_unreference(&obj->base);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 179	}
 
 
 
 
 
 
 180	/*
 181	 * Objects already transfered to the vmas list will be unreferenced by
 182	 * eb_destroy.
 
 
 
 
 
 183	 */
 184
 185	return ret;
 
 
 
 
 
 
 
 
 
 
 
 186}
 187
 188static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
 
 189{
 190	if (eb->and < 0) {
 191		if (handle >= -eb->and)
 192			return NULL;
 193		return eb->lut[handle];
 194	} else {
 195		struct hlist_head *head;
 196		struct i915_vma *vma;
 197
 198		head = &eb->buckets[handle & eb->and];
 199		hlist_for_each_entry(vma, head, exec_node) {
 200			if (vma->exec_handle == handle)
 201				return vma;
 202		}
 203		return NULL;
 204	}
 205}
 206
 207static void
 208i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
 209{
 210	struct drm_i915_gem_exec_object2 *entry;
 211	struct drm_i915_gem_object *obj = vma->obj;
 212
 213	if (!drm_mm_node_allocated(&vma->node))
 214		return;
 
 215
 216	entry = vma->exec_entry;
 
 217
 218	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
 219		i915_gem_object_unpin_fence(obj);
 
 220
 221	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
 222		vma->pin_count--;
 223
 224	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
 
 
 225}
 226
 227static void eb_destroy(struct eb_vmas *eb)
 228{
 229	while (!list_empty(&eb->vmas)) {
 230		struct i915_vma *vma;
 
 
 
 231
 232		vma = list_first_entry(&eb->vmas,
 233				       struct i915_vma,
 234				       exec_list);
 235		list_del_init(&vma->exec_list);
 236		i915_gem_execbuffer_unreserve_vma(vma);
 237		drm_gem_object_unreference(&vma->obj->base);
 238	}
 239	kfree(eb);
 240}
 241
 242static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
 
 
 243{
 244	return (HAS_LLC(obj->base.dev) ||
 245		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
 246		obj->cache_level != I915_CACHE_NONE);
 247}
 248
 249/* Used to convert any address to canonical form.
 250 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
 251 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
 252 * addresses to be in a canonical form:
 253 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
 254 * canonical form [63:48] == [47]."
 255 */
 256#define GEN8_HIGH_ADDRESS_BIT 47
 257static inline uint64_t gen8_canonical_addr(uint64_t address)
 258{
 259	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
 
 
 
 
 
 
 
 
 
 
 260}
 261
 262static inline uint64_t gen8_noncanonical_addr(uint64_t address)
 263{
 264	return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
 265}
 266
 267static inline uint64_t
 268relocation_target(struct drm_i915_gem_relocation_entry *reloc,
 269		  uint64_t target_offset)
 270{
 271	return gen8_canonical_addr((int)reloc->delta + target_offset);
 272}
 273
 274static int
 275relocate_entry_cpu(struct drm_i915_gem_object *obj,
 276		   struct drm_i915_gem_relocation_entry *reloc,
 277		   uint64_t target_offset)
 278{
 279	struct drm_device *dev = obj->base.dev;
 280	uint32_t page_offset = offset_in_page(reloc->offset);
 281	uint64_t delta = relocation_target(reloc, target_offset);
 282	char *vaddr;
 283	int ret;
 284
 285	ret = i915_gem_object_set_to_cpu_domain(obj, true);
 286	if (ret)
 287		return ret;
 
 
 
 288
 289	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
 290				reloc->offset >> PAGE_SHIFT));
 291	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
 292
 293	if (INTEL_INFO(dev)->gen >= 8) {
 294		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
 
 
 
 
 
 
 
 
 
 
 
 
 295
 296		if (page_offset == 0) {
 297			kunmap_atomic(vaddr);
 298			vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
 299			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
 
 
 
 
 
 
 
 
 
 
 300		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 301
 302		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
 
 
 
 
 
 
 
 
 
 
 303	}
 304
 305	kunmap_atomic(vaddr);
 
 
 306
 307	return 0;
 308}
 309
 310static int
 311relocate_entry_gtt(struct drm_i915_gem_object *obj,
 312		   struct drm_i915_gem_relocation_entry *reloc,
 313		   uint64_t target_offset)
 314{
 315	struct drm_device *dev = obj->base.dev;
 316	struct drm_i915_private *dev_priv = dev->dev_private;
 317	uint64_t delta = relocation_target(reloc, target_offset);
 318	uint64_t offset;
 319	void __iomem *reloc_page;
 320	int ret;
 321
 322	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 323	if (ret)
 324		return ret;
 325
 326	ret = i915_gem_object_put_fence(obj);
 327	if (ret)
 328		return ret;
 329
 330	/* Map the page containing the relocation we're going to perform.  */
 331	offset = i915_gem_obj_ggtt_offset(obj);
 332	offset += reloc->offset;
 333	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 334					      offset & PAGE_MASK);
 335	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
 336
 337	if (INTEL_INFO(dev)->gen >= 8) {
 338		offset += sizeof(uint32_t);
 339
 340		if (offset_in_page(offset) == 0) {
 341			io_mapping_unmap_atomic(reloc_page);
 342			reloc_page =
 343				io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 344							 offset);
 
 
 
 
 
 
 
 
 345		}
 
 346
 347		iowrite32(upper_32_bits(delta),
 348			  reloc_page + offset_in_page(offset));
 
 
 
 
 
 
 349	}
 350
 351	io_mapping_unmap_atomic(reloc_page);
 
 
 
 352
 353	return 0;
 354}
 355
 356static void
 357clflush_write32(void *addr, uint32_t value)
 
 358{
 359	/* This is not a fast path, so KISS. */
 360	drm_clflush_virt_range(addr, sizeof(uint32_t));
 361	*(uint32_t *)addr = value;
 362	drm_clflush_virt_range(addr, sizeof(uint32_t));
 
 
 
 
 
 
 
 
 
 363}
 364
 365static int
 366relocate_entry_clflush(struct drm_i915_gem_object *obj,
 367		       struct drm_i915_gem_relocation_entry *reloc,
 368		       uint64_t target_offset)
 369{
 370	struct drm_device *dev = obj->base.dev;
 371	uint32_t page_offset = offset_in_page(reloc->offset);
 372	uint64_t delta = relocation_target(reloc, target_offset);
 373	char *vaddr;
 374	int ret;
 375
 376	ret = i915_gem_object_set_to_gtt_domain(obj, true);
 377	if (ret)
 378		return ret;
 379
 380	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
 381				reloc->offset >> PAGE_SHIFT));
 382	clflush_write32(vaddr + page_offset, lower_32_bits(delta));
 
 
 
 
 
 
 
 
 
 383
 384	if (INTEL_INFO(dev)->gen >= 8) {
 385		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 386
 387		if (page_offset == 0) {
 388			kunmap_atomic(vaddr);
 389			vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
 390			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
 391		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 392
 393		clflush_write32(vaddr + page_offset, upper_32_bits(delta));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 394	}
 395
 396	kunmap_atomic(vaddr);
 
 397
 398	return 0;
 399}
 400
 401static int
 402i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 403				   struct eb_vmas *eb,
 404				   struct drm_i915_gem_relocation_entry *reloc)
 405{
 406	struct drm_device *dev = obj->base.dev;
 407	struct drm_gem_object *target_obj;
 408	struct drm_i915_gem_object *target_i915_obj;
 409	struct i915_vma *target_vma;
 410	uint64_t target_offset;
 411	int ret;
 412
 413	/* we've already hold a reference to all valid objects */
 414	target_vma = eb_get_vma(eb, reloc->target_handle);
 415	if (unlikely(target_vma == NULL))
 416		return -ENOENT;
 417	target_i915_obj = target_vma->obj;
 418	target_obj = &target_vma->obj->base;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 419
 420	target_offset = gen8_canonical_addr(target_vma->node.start);
 
 421
 422	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
 423	 * pipe_control writes because the gpu doesn't properly redirect them
 424	 * through the ppgtt for non_secure batchbuffers. */
 425	if (unlikely(IS_GEN6(dev) &&
 426	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
 427		ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
 428				    PIN_GLOBAL);
 429		if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
 430			return ret;
 
 
 
 
 
 431	}
 432
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 433	/* Validate that the target is in a valid r/w GPU domain */
 434	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
 435		DRM_DEBUG("reloc with multiple write domains: "
 436			  "obj %p target %d offset %d "
 437			  "read %08x write %08x",
 438			  obj, reloc->target_handle,
 439			  (int) reloc->offset,
 440			  reloc->read_domains,
 441			  reloc->write_domain);
 442		return -EINVAL;
 443	}
 444	if (unlikely((reloc->write_domain | reloc->read_domains)
 445		     & ~I915_GEM_GPU_DOMAINS)) {
 446		DRM_DEBUG("reloc with read/write non-GPU domains: "
 447			  "obj %p target %d offset %d "
 448			  "read %08x write %08x",
 449			  obj, reloc->target_handle,
 450			  (int) reloc->offset,
 451			  reloc->read_domains,
 452			  reloc->write_domain);
 453		return -EINVAL;
 454	}
 455
 456	target_obj->pending_read_domains |= reloc->read_domains;
 457	target_obj->pending_write_domain |= reloc->write_domain;
 458
 459	/* If the relocation already has the right value in it, no
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 460	 * more work needs to be done.
 461	 */
 462	if (target_offset == reloc->presumed_offset)
 
 463		return 0;
 464
 465	/* Check that the relocation address is valid... */
 466	if (unlikely(reloc->offset >
 467		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
 468		DRM_DEBUG("Relocation beyond object bounds: "
 469			  "obj %p target %d offset %d size %d.\n",
 470			  obj, reloc->target_handle,
 471			  (int) reloc->offset,
 472			  (int) obj->base.size);
 473		return -EINVAL;
 474	}
 475	if (unlikely(reloc->offset & 3)) {
 476		DRM_DEBUG("Relocation not 4-byte aligned: "
 477			  "obj %p target %d offset %d.\n",
 478			  obj, reloc->target_handle,
 479			  (int) reloc->offset);
 480		return -EINVAL;
 481	}
 482
 483	/* We can't wait for rendering with pagefaults disabled */
 484	if (obj->active && pagefault_disabled())
 485		return -EFAULT;
 486
 487	if (use_cpu_reloc(obj))
 488		ret = relocate_entry_cpu(obj, reloc, target_offset);
 489	else if (obj->map_and_fenceable)
 490		ret = relocate_entry_gtt(obj, reloc, target_offset);
 491	else if (cpu_has_clflush)
 492		ret = relocate_entry_clflush(obj, reloc, target_offset);
 493	else {
 494		WARN_ONCE(1, "Impossible case in relocation handling\n");
 495		ret = -ENODEV;
 496	}
 497
 498	if (ret)
 499		return ret;
 500
 501	/* and update the user's relocation entry */
 502	reloc->presumed_offset = target_offset;
 503
 504	return 0;
 505}
 506
 507static int
 508i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
 509				 struct eb_vmas *eb)
 510{
 511#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 512	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
 513	struct drm_i915_gem_relocation_entry __user *user_relocs;
 514	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 515	int remain, ret;
 516
 517	user_relocs = to_user_ptr(entry->relocs_ptr);
 518
 
 519	remain = entry->relocation_count;
 520	while (remain) {
 521		struct drm_i915_gem_relocation_entry *r = stack_reloc;
 522		int count = remain;
 523		if (count > ARRAY_SIZE(stack_reloc))
 524			count = ARRAY_SIZE(stack_reloc);
 525		remain -= count;
 526
 527		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
 528			return -EFAULT;
 
 
 
 
 
 529
 530		do {
 531			u64 offset = r->presumed_offset;
 
 
 
 532
 533			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
 534			if (ret)
 535				return ret;
 536
 537			if (r->presumed_offset != offset &&
 538			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
 539						    &r->presumed_offset,
 540						    sizeof(r->presumed_offset))) {
 541				return -EFAULT;
 542			}
 
 
 
 
 
 543
 544			user_relocs++;
 545			r++;
 546		} while (--count);
 547	}
 548
 549	return 0;
 550#undef N_RELOC
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 551}
 552
 553static int
 554i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
 555				      struct eb_vmas *eb,
 556				      struct drm_i915_gem_relocation_entry *relocs)
 557{
 558	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 559	int i, ret;
 
 
 
 560
 561	for (i = 0; i < entry->relocation_count; i++) {
 562		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
 563		if (ret)
 564			return ret;
 565	}
 566
 567	return 0;
 568}
 569
 570static int
 571i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 572{
 573	struct i915_vma *vma;
 574	int ret = 0;
 575
 576	/* This is the fast path and we cannot handle a pagefault whilst
 577	 * holding the struct mutex lest the user pass in the relocations
 578	 * contained within a mmaped bo. For in such a case we, the page
 579	 * fault handler would call i915_gem_fault() and we would try to
 580	 * acquire the struct mutex again. Obviously this is bad and so
 581	 * lockdep complains vehemently.
 582	 */
 583	pagefault_disable();
 584	list_for_each_entry(vma, &eb->vmas, exec_list) {
 585		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
 586		if (ret)
 587			break;
 588	}
 589	pagefault_enable();
 590
 591	return ret;
 
 592}
 593
 594static bool only_mappable_for_reloc(unsigned int flags)
 595{
 596	return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
 597		__EXEC_OBJECT_NEEDS_MAP;
 598}
 599
 600static int
 601i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 602				struct intel_engine_cs *ring,
 603				bool *need_reloc)
 604{
 605	struct drm_i915_gem_object *obj = vma->obj;
 606	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 607	uint64_t flags;
 608	int ret;
 609
 610	flags = PIN_USER;
 611	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 612		flags |= PIN_GLOBAL;
 613
 614	if (!drm_mm_node_allocated(&vma->node)) {
 615		/* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
 616		 * limit address to the first 4GBs for unflagged objects.
 617		 */
 618		if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
 619			flags |= PIN_ZONE_4G;
 620		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 621			flags |= PIN_GLOBAL | PIN_MAPPABLE;
 622		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
 623			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 624		if (entry->flags & EXEC_OBJECT_PINNED)
 625			flags |= entry->offset | PIN_OFFSET_FIXED;
 626		if ((flags & PIN_MAPPABLE) == 0)
 627			flags |= PIN_HIGH;
 628	}
 629
 630	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
 631	if ((ret == -ENOSPC  || ret == -E2BIG) &&
 632	    only_mappable_for_reloc(entry->flags))
 633		ret = i915_gem_object_pin(obj, vma->vm,
 634					  entry->alignment,
 635					  flags & ~PIN_MAPPABLE);
 636	if (ret)
 637		return ret;
 638
 639	entry->flags |= __EXEC_OBJECT_HAS_PIN;
 640
 641	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
 642		ret = i915_gem_object_get_fence(obj);
 643		if (ret)
 644			return ret;
 645
 646		if (i915_gem_object_pin_fence(obj))
 647			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 648	}
 649
 650	if (entry->offset != vma->node.start) {
 651		entry->offset = vma->node.start;
 652		*need_reloc = true;
 653	}
 654
 655	if (entry->flags & EXEC_OBJECT_WRITE) {
 656		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
 657		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
 
 
 658	}
 659
 660	return 0;
 661}
 662
 663static bool
 664need_reloc_mappable(struct i915_vma *vma)
 665{
 666	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 
 
 667
 668	if (entry->relocation_count == 0)
 669		return false;
 
 
 
 
 670
 671	if (!vma->is_ggtt)
 672		return false;
 673
 674	/* See also use_cpu_reloc() */
 675	if (HAS_LLC(vma->obj->base.dev))
 676		return false;
 677
 678	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 679		return false;
 680
 681	return true;
 682}
 683
 684static bool
 685eb_vma_misplaced(struct i915_vma *vma)
 686{
 687	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 688	struct drm_i915_gem_object *obj = vma->obj;
 689
 690	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt);
 
 
 
 
 691
 692	if (entry->alignment &&
 693	    vma->node.start & (entry->alignment - 1))
 694		return true;
 
 
 
 
 695
 696	if (entry->flags & EXEC_OBJECT_PINNED &&
 697	    vma->node.start != entry->offset)
 698		return true;
 699
 700	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 701	    vma->node.start < BATCH_OFFSET_BIAS)
 702		return true;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 703
 704	/* avoid costly ping-pong once a batch bo ended up non-mappable */
 705	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
 706		return !only_mappable_for_reloc(entry->flags);
 707
 708	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
 709	    (vma->node.start + vma->node.size - 1) >> 32)
 710		return true;
 711
 712	return false;
 
 
 
 
 
 
 
 713}
 714
 715static int
 716i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 717			    struct list_head *vmas,
 718			    struct intel_context *ctx,
 719			    bool *need_relocs)
 720{
 721	struct drm_i915_gem_object *obj;
 722	struct i915_vma *vma;
 723	struct i915_address_space *vm;
 724	struct list_head ordered_vmas;
 725	struct list_head pinned_vmas;
 726	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 727	int retry;
 728
 729	i915_gem_retire_requests_ring(ring);
 730
 731	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 732
 733	INIT_LIST_HEAD(&ordered_vmas);
 734	INIT_LIST_HEAD(&pinned_vmas);
 735	while (!list_empty(vmas)) {
 736		struct drm_i915_gem_exec_object2 *entry;
 737		bool need_fence, need_mappable;
 738
 739		vma = list_first_entry(vmas, struct i915_vma, exec_list);
 740		obj = vma->obj;
 741		entry = vma->exec_entry;
 742
 743		if (ctx->flags & CONTEXT_NO_ZEROMAP)
 744			entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
 745
 746		if (!has_fenced_gpu_access)
 747			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 748		need_fence =
 749			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 750			obj->tiling_mode != I915_TILING_NONE;
 751		need_mappable = need_fence || need_reloc_mappable(vma);
 752
 753		if (entry->flags & EXEC_OBJECT_PINNED)
 754			list_move_tail(&vma->exec_list, &pinned_vmas);
 755		else if (need_mappable) {
 756			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 757			list_move(&vma->exec_list, &ordered_vmas);
 758		} else
 759			list_move_tail(&vma->exec_list, &ordered_vmas);
 760
 761		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
 762		obj->base.pending_write_domain = 0;
 763	}
 764	list_splice(&ordered_vmas, vmas);
 765	list_splice(&pinned_vmas, vmas);
 766
 767	/* Attempt to pin all of the buffers into the GTT.
 768	 * This is done in 3 phases:
 769	 *
 770	 * 1a. Unbind all objects that do not match the GTT constraints for
 771	 *     the execbuffer (fenceable, mappable, alignment etc).
 772	 * 1b. Increment pin count for already bound objects.
 773	 * 2.  Bind new objects.
 774	 * 3.  Decrement pin count.
 775	 *
 776	 * This avoid unnecessary unbinding of later objects in order to make
 777	 * room for the earlier objects *unless* we need to defragment.
 778	 */
 779	retry = 0;
 780	do {
 781		int ret = 0;
 782
 783		/* Unbind any ill-fitting objects or pin. */
 784		list_for_each_entry(vma, vmas, exec_list) {
 785			if (!drm_mm_node_allocated(&vma->node))
 786				continue;
 787
 788			if (eb_vma_misplaced(vma))
 789				ret = i915_vma_unbind(vma);
 790			else
 791				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
 792			if (ret)
 793				goto err;
 794		}
 795
 796		/* Bind fresh objects */
 797		list_for_each_entry(vma, vmas, exec_list) {
 798			if (drm_mm_node_allocated(&vma->node))
 799				continue;
 800
 801			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
 802			if (ret)
 803				goto err;
 804		}
 805
 806err:
 807		if (ret != -ENOSPC || retry++)
 808			return ret;
 
 809
 810		/* Decrement pin count for bound objects */
 811		list_for_each_entry(vma, vmas, exec_list)
 812			i915_gem_execbuffer_unreserve_vma(vma);
 813
 814		ret = i915_gem_evict_vm(vm, true);
 815		if (ret)
 816			return ret;
 817	} while (1);
 818}
 819
 820static int
 821i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 822				  struct drm_i915_gem_execbuffer2 *args,
 823				  struct drm_file *file,
 824				  struct intel_engine_cs *ring,
 825				  struct eb_vmas *eb,
 826				  struct drm_i915_gem_exec_object2 *exec,
 827				  struct intel_context *ctx)
 828{
 829	struct drm_i915_gem_relocation_entry *reloc;
 830	struct i915_address_space *vm;
 831	struct i915_vma *vma;
 832	bool need_relocs;
 833	int *reloc_offset;
 834	int i, total, ret;
 835	unsigned count = args->buffer_count;
 836
 837	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
 838
 839	/* We may process another execbuffer during the unlock... */
 840	while (!list_empty(&eb->vmas)) {
 841		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
 842		list_del_init(&vma->exec_list);
 843		i915_gem_execbuffer_unreserve_vma(vma);
 844		drm_gem_object_unreference(&vma->obj->base);
 845	}
 846
 
 
 847	mutex_unlock(&dev->struct_mutex);
 848
 849	total = 0;
 850	for (i = 0; i < count; i++)
 851		total += exec[i].relocation_count;
 852
 853	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
 854	reloc = drm_malloc_ab(total, sizeof(*reloc));
 855	if (reloc == NULL || reloc_offset == NULL) {
 856		drm_free_large(reloc);
 857		drm_free_large(reloc_offset);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 858		mutex_lock(&dev->struct_mutex);
 859		return -ENOMEM;
 860	}
 861
 862	total = 0;
 863	for (i = 0; i < count; i++) {
 864		struct drm_i915_gem_relocation_entry __user *user_relocs;
 865		u64 invalid_offset = (u64)-1;
 866		int j;
 867
 868		user_relocs = to_user_ptr(exec[i].relocs_ptr);
 869
 870		if (copy_from_user(reloc+total, user_relocs,
 871				   exec[i].relocation_count * sizeof(*reloc))) {
 872			ret = -EFAULT;
 873			mutex_lock(&dev->struct_mutex);
 874			goto err;
 875		}
 876
 877		/* As we do not update the known relocation offsets after
 878		 * relocating (due to the complexities in lock handling),
 879		 * we need to mark them as invalid now so that we force the
 880		 * relocation processing next time. Just in case the target
 881		 * object is evicted and then rebound into its old
 882		 * presumed_offset before the next execbuffer - if that
 883		 * happened we would make the mistake of assuming that the
 884		 * relocations were valid.
 885		 */
 886		for (j = 0; j < exec[i].relocation_count; j++) {
 887			if (__copy_to_user(&user_relocs[j].presumed_offset,
 888					   &invalid_offset,
 889					   sizeof(invalid_offset))) {
 890				ret = -EFAULT;
 891				mutex_lock(&dev->struct_mutex);
 892				goto err;
 893			}
 894		}
 895
 896		reloc_offset[i] = total;
 897		total += exec[i].relocation_count;
 898	}
 899
 900	ret = i915_mutex_lock_interruptible(dev);
 901	if (ret) {
 902		mutex_lock(&dev->struct_mutex);
 903		goto err;
 904	}
 905
 906	/* reacquire the objects */
 907	eb_reset(eb);
 908	ret = eb_lookup_vmas(eb, exec, args, vm, file);
 909	if (ret)
 910		goto err;
 911
 912	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
 913	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs);
 914	if (ret)
 915		goto err;
 916
 917	list_for_each_entry(vma, &eb->vmas, exec_list) {
 918		int offset = vma->exec_entry - exec;
 919		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
 920							    reloc + reloc_offset[offset]);
 921		if (ret)
 922			goto err;
 
 
 
 
 
 
 923	}
 924
 925	/* Leave the user relocations as are, this is the painfully slow path,
 
 926	 * and we want to avoid the complication of dropping the lock whilst
 927	 * having buffers reserved in the aperture and so causing spurious
 928	 * ENOSPC for random operations.
 929	 */
 930
 931err:
 932	drm_free_large(reloc);
 933	drm_free_large(reloc_offset);
 934	return ret;
 935}
 936
 937static int
 938i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 939				struct list_head *vmas)
 940{
 941	const unsigned other_rings = ~intel_ring_flag(req->ring);
 942	struct i915_vma *vma;
 943	uint32_t flush_domains = 0;
 944	bool flush_chipset = false;
 945	int ret;
 946
 947	list_for_each_entry(vma, vmas, exec_list) {
 948		struct drm_i915_gem_object *obj = vma->obj;
 949
 950		if (obj->active & other_rings) {
 951			ret = i915_gem_object_sync(obj, req->ring, &req);
 952			if (ret)
 953				return ret;
 954		}
 955
 956		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 957			flush_chipset |= i915_gem_clflush_object(obj, false);
 958
 959		flush_domains |= obj->base.write_domain;
 960	}
 961
 962	if (flush_chipset)
 963		i915_gem_chipset_flush(req->ring->dev);
 964
 965	if (flush_domains & I915_GEM_DOMAIN_GTT)
 966		wmb();
 967
 968	/* Unconditionally invalidate gpu caches and ensure that we do flush
 969	 * any residual writes from the previous batch.
 970	 */
 971	return intel_ring_invalidate_all_caches(req);
 972}
 973
 974static bool
 975i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 976{
 977	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
 978		return false;
 979
 980	/* Kernel clipping was a DRI1 misfeature */
 981	if (exec->num_cliprects || exec->cliprects_ptr)
 982		return false;
 983
 984	if (exec->DR4 == 0xffffffff) {
 985		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
 986		exec->DR4 = 0;
 
 987	}
 988	if (exec->DR1 || exec->DR4)
 989		return false;
 990
 991	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
 992		return false;
 993
 994	return true;
 
 995}
 996
 997static int
 998validate_exec_list(struct drm_device *dev,
 999		   struct drm_i915_gem_exec_object2 *exec,
1000		   int count)
1001{
1002	unsigned relocs_total = 0;
1003	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1004	unsigned invalid_flags;
1005	int i;
 
 
 
 
 
 
 
 
 
1006
1007	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
1008	if (USES_FULL_PPGTT(dev))
1009		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 
 
1010
1011	for (i = 0; i < count; i++) {
1012		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
1013		int length; /* limited by fault_in_pages_readable() */
 
1014
1015		if (exec[i].flags & invalid_flags)
1016			return -EINVAL;
 
 
 
 
 
 
 
 
 
1017
1018		/* Offset can be used as input (EXEC_OBJECT_PINNED), reject
1019		 * any non-page-aligned or non-canonical addresses.
 
 
 
 
 
 
 
 
 
1020		 */
1021		if (exec[i].flags & EXEC_OBJECT_PINNED) {
1022			if (exec[i].offset !=
1023			    gen8_canonical_addr(exec[i].offset & PAGE_MASK))
1024				return -EINVAL;
1025
1026			/* From drm_mm perspective address space is continuous,
1027			 * so from this point we're always using non-canonical
1028			 * form internally.
1029			 */
1030			exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
1031		}
1032
1033		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
1034			return -EINVAL;
 
 
 
 
 
 
1035
1036		/* First check for malicious input causing overflow in
1037		 * the worst case where we need to allocate the entire
1038		 * relocation tree as a single array.
1039		 */
1040		if (exec[i].relocation_count > relocs_max - relocs_total)
1041			return -EINVAL;
1042		relocs_total += exec[i].relocation_count;
1043
1044		length = exec[i].relocation_count *
1045			sizeof(struct drm_i915_gem_relocation_entry);
1046		/*
1047		 * We must check that the entire relocation array is safe
1048		 * to read, but since we may need to update the presumed
1049		 * offsets during execution, check for full write access.
1050		 */
1051		if (!access_ok(VERIFY_WRITE, ptr, length))
1052			return -EFAULT;
1053
1054		if (likely(!i915.prefault_disable)) {
1055			if (fault_in_multipages_readable(ptr, length))
1056				return -EFAULT;
1057		}
 
1058	}
 
 
 
 
1059
1060	return 0;
1061}
1062
1063static struct intel_context *
1064i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
1065			  struct intel_engine_cs *ring, const u32 ctx_id)
1066{
1067	struct intel_context *ctx = NULL;
1068	struct i915_ctx_hang_stats *hs;
1069
1070	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
1071		return ERR_PTR(-EINVAL);
1072
1073	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
1074	if (IS_ERR(ctx))
1075		return ctx;
1076
1077	hs = &ctx->hang_stats;
1078	if (hs->banned) {
1079		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1080		return ERR_PTR(-EIO);
1081	}
1082
1083	if (i915.enable_execlists && !ctx->engine[ring->id].state) {
1084		int ret = intel_lr_context_deferred_alloc(ctx, ring);
1085		if (ret) {
1086			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
1087			return ERR_PTR(ret);
1088		}
1089	}
 
 
 
 
 
1090
1091	return ctx;
1092}
1093
1094void
1095i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1096				   struct drm_i915_gem_request *req)
1097{
1098	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
1099	struct i915_vma *vma;
1100
1101	list_for_each_entry(vma, vmas, exec_list) {
1102		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
1103		struct drm_i915_gem_object *obj = vma->obj;
1104		u32 old_read = obj->base.read_domains;
1105		u32 old_write = obj->base.write_domain;
1106
1107		obj->dirty = 1; /* be paranoid  */
1108		obj->base.write_domain = obj->base.pending_write_domain;
1109		if (obj->base.write_domain == 0)
1110			obj->base.pending_read_domains |= obj->base.read_domains;
1111		obj->base.read_domains = obj->base.pending_read_domains;
1112
1113		i915_vma_move_to_active(vma, req);
1114		if (obj->base.write_domain) {
1115			i915_gem_request_assign(&obj->last_write_req, req);
1116
1117			intel_fb_obj_invalidate(obj, ORIGIN_CS);
1118
1119			/* update for the implicit flush after a batch */
1120			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1121		}
1122		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
1123			i915_gem_request_assign(&obj->last_fenced_req, req);
1124			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
1125				struct drm_i915_private *dev_priv = to_i915(ring->dev);
1126				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1127					       &dev_priv->mm.fence_list);
1128			}
1129		}
1130
1131		trace_i915_gem_object_change_domain(obj, old_read, old_write);
1132	}
1133}
1134
1135void
1136i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
1137{
1138	/* Unconditionally force add_request to emit a full flush. */
1139	params->ring->gpu_caches_dirty = true;
1140
1141	/* Add a breadcrumb for the completion of the batch buffer */
1142	__i915_add_request(params->request, params->batch_obj, true);
 
 
 
 
1143}
1144
1145static int
1146i915_reset_gen7_sol_offsets(struct drm_device *dev,
1147			    struct drm_i915_gem_request *req)
1148{
1149	struct intel_engine_cs *ring = req->ring;
1150	struct drm_i915_private *dev_priv = dev->dev_private;
1151	int ret, i;
1152
1153	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1154		DRM_DEBUG("sol reset is gen7/rcs only\n");
1155		return -EINVAL;
1156	}
1157
1158	ret = intel_ring_begin(req, 4 * 3);
1159	if (ret)
1160		return ret;
1161
 
1162	for (i = 0; i < 4; i++) {
1163		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1164		intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
1165		intel_ring_emit(ring, 0);
1166	}
1167
1168	intel_ring_advance(ring);
1169
1170	return 0;
1171}
1172
1173static struct drm_i915_gem_object*
1174i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
1175			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1176			  struct eb_vmas *eb,
1177			  struct drm_i915_gem_object *batch_obj,
1178			  u32 batch_start_offset,
1179			  u32 batch_len,
1180			  bool is_master)
1181{
1182	struct drm_i915_gem_object *shadow_batch_obj;
1183	struct i915_vma *vma;
1184	int ret;
1185
1186	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
1187						   PAGE_ALIGN(batch_len));
1188	if (IS_ERR(shadow_batch_obj))
1189		return shadow_batch_obj;
1190
1191	ret = i915_parse_cmds(ring,
1192			      batch_obj,
1193			      shadow_batch_obj,
1194			      batch_start_offset,
1195			      batch_len,
1196			      is_master);
1197	if (ret)
1198		goto err;
1199
1200	ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
1201	if (ret)
1202		goto err;
1203
1204	i915_gem_object_unpin_pages(shadow_batch_obj);
1205
1206	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1207
1208	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
1209	vma->exec_entry = shadow_exec_entry;
1210	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1211	drm_gem_object_reference(&shadow_batch_obj->base);
1212	list_add_tail(&vma->exec_list, &eb->vmas);
 
 
 
 
1213
1214	shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
 
 
1215
1216	return shadow_batch_obj;
 
 
 
 
1217
1218err:
1219	i915_gem_object_unpin_pages(shadow_batch_obj);
1220	if (ret == -EACCES) /* unhandled chained batch */
1221		return batch_obj;
1222	else
1223		return ERR_PTR(ret);
1224}
1225
1226int
1227i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
1228			       struct drm_i915_gem_execbuffer2 *args,
1229			       struct list_head *vmas)
1230{
1231	struct drm_device *dev = params->dev;
1232	struct intel_engine_cs *ring = params->ring;
1233	struct drm_i915_private *dev_priv = dev->dev_private;
1234	u64 exec_start, exec_len;
1235	int instp_mode;
1236	u32 instp_mask;
1237	int ret;
1238
1239	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
1240	if (ret)
1241		return ret;
1242
1243	ret = i915_switch_context(params->request);
1244	if (ret)
1245		return ret;
1246
1247	WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<<ring->id),
1248	     "%s didn't clear reload\n", ring->name);
1249
1250	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1251	instp_mask = I915_EXEC_CONSTANTS_MASK;
1252	switch (instp_mode) {
1253	case I915_EXEC_CONSTANTS_REL_GENERAL:
1254	case I915_EXEC_CONSTANTS_ABSOLUTE:
1255	case I915_EXEC_CONSTANTS_REL_SURFACE:
1256		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1257			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1258			return -EINVAL;
1259		}
1260
1261		if (instp_mode != dev_priv->relative_constants_mode) {
1262			if (INTEL_INFO(dev)->gen < 4) {
1263				DRM_DEBUG("no rel constants on pre-gen4\n");
1264				return -EINVAL;
1265			}
1266
1267			if (INTEL_INFO(dev)->gen > 5 &&
1268			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1269				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1270				return -EINVAL;
1271			}
1272
1273			/* The HW changed the meaning on this bit on gen6 */
1274			if (INTEL_INFO(dev)->gen >= 6)
1275				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1276		}
1277		break;
1278	default:
1279		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1280		return -EINVAL;
1281	}
1282
1283	if (ring == &dev_priv->ring[RCS] &&
1284	    instp_mode != dev_priv->relative_constants_mode) {
1285		ret = intel_ring_begin(params->request, 4);
1286		if (ret)
1287			return ret;
1288
1289		intel_ring_emit(ring, MI_NOOP);
1290		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1291		intel_ring_emit_reg(ring, INSTPM);
1292		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1293		intel_ring_advance(ring);
1294
1295		dev_priv->relative_constants_mode = instp_mode;
1296	}
 
1297
1298	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1299		ret = i915_reset_gen7_sol_offsets(dev, params->request);
1300		if (ret)
1301			return ret;
1302	}
1303
1304	exec_len   = args->batch_len;
1305	exec_start = params->batch_obj_vm_offset +
1306		     params->args_batch_start_offset;
1307
1308	if (exec_len == 0)
1309		exec_len = params->batch_obj->base.size;
1310
1311	ret = ring->dispatch_execbuffer(params->request,
1312					exec_start, exec_len,
1313					params->dispatch_flags);
1314	if (ret)
1315		return ret;
1316
1317	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
1318
1319	i915_gem_execbuffer_move_to_active(vmas, params->request);
1320	i915_gem_execbuffer_retire_commands(params);
1321
1322	return 0;
1323}
1324
1325/**
1326 * Find one BSD ring to dispatch the corresponding BSD command.
1327 * The ring index is returned.
1328 */
1329static unsigned int
1330gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file)
 
1331{
1332	struct drm_i915_file_private *file_priv = file->driver_priv;
1333
1334	/* Check whether the file_priv has already selected one ring. */
1335	if ((int)file_priv->bsd_ring < 0) {
1336		/* If not, use the ping-pong mechanism to select one. */
1337		mutex_lock(&dev_priv->dev->struct_mutex);
1338		file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index;
1339		dev_priv->mm.bsd_ring_dispatch_index ^= 1;
1340		mutex_unlock(&dev_priv->dev->struct_mutex);
1341	}
1342
1343	return file_priv->bsd_ring;
1344}
1345
1346static struct drm_i915_gem_object *
1347eb_get_batch(struct eb_vmas *eb)
1348{
1349	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1350
1351	/*
1352	 * SNA is doing fancy tricks with compressing batch buffers, which leads
1353	 * to negative relocation deltas. Usually that works out ok since the
1354	 * relocate address is still positive, except when the batch is placed
1355	 * very low in the GTT. Ensure this doesn't happen.
1356	 *
1357	 * Note that actual hangs have only been observed on gen7, but for
1358	 * paranoia do it everywhere.
1359	 */
1360	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
1361		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1362
1363	return vma->obj;
1364}
1365
1366#define I915_USER_RINGS (4)
1367
1368static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = {
1369	[I915_EXEC_DEFAULT]	= RCS,
1370	[I915_EXEC_RENDER]	= RCS,
1371	[I915_EXEC_BLT]		= BCS,
1372	[I915_EXEC_BSD]		= VCS,
1373	[I915_EXEC_VEBOX]	= VECS
1374};
1375
1376static int
1377eb_select_ring(struct drm_i915_private *dev_priv,
1378	       struct drm_file *file,
1379	       struct drm_i915_gem_execbuffer2 *args,
1380	       struct intel_engine_cs **ring)
1381{
1382	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
 
1383
1384	if (user_ring_id > I915_USER_RINGS) {
1385		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
1386		return -EINVAL;
1387	}
1388
1389	if ((user_ring_id != I915_EXEC_BSD) &&
1390	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1391		DRM_DEBUG("execbuf with non bsd ring but with invalid "
1392			  "bsd dispatch flags: %d\n", (int)(args->flags));
1393		return -EINVAL;
1394	}
1395
1396	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
1397		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
1398
1399		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
1400			bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file);
1401		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
1402			   bsd_idx <= I915_EXEC_BSD_RING2) {
1403			bsd_idx >>= I915_EXEC_BSD_SHIFT;
1404			bsd_idx--;
1405		} else {
1406			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
1407				  bsd_idx);
1408			return -EINVAL;
1409		}
1410
1411		*ring = &dev_priv->ring[_VCS(bsd_idx)];
1412	} else {
1413		*ring = &dev_priv->ring[user_ring_map[user_ring_id]];
1414	}
1415
1416	if (!intel_ring_initialized(*ring)) {
1417		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
1418		return -EINVAL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1419	}
1420
1421	return 0;
1422}
1423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1424static int
1425i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1426		       struct drm_file *file,
1427		       struct drm_i915_gem_execbuffer2 *args,
1428		       struct drm_i915_gem_exec_object2 *exec)
 
1429{
1430	struct drm_i915_private *dev_priv = dev->dev_private;
1431	struct drm_i915_gem_request *req = NULL;
1432	struct eb_vmas *eb;
1433	struct drm_i915_gem_object *batch_obj;
1434	struct drm_i915_gem_exec_object2 shadow_exec_entry;
1435	struct intel_engine_cs *ring;
1436	struct intel_context *ctx;
1437	struct i915_address_space *vm;
1438	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
1439	struct i915_execbuffer_params *params = &params_master;
1440	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1441	u32 dispatch_flags;
1442	int ret;
1443	bool need_relocs;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1444
1445	if (!i915_gem_check_execbuffer(args))
1446		return -EINVAL;
1447
1448	ret = validate_exec_list(dev, exec, args->buffer_count);
1449	if (ret)
1450		return ret;
1451
1452	dispatch_flags = 0;
1453	if (args->flags & I915_EXEC_SECURE) {
1454		if (!file->is_master || !capable(CAP_SYS_ADMIN))
1455		    return -EPERM;
1456
1457		dispatch_flags |= I915_DISPATCH_SECURE;
1458	}
1459	if (args->flags & I915_EXEC_IS_PINNED)
1460		dispatch_flags |= I915_DISPATCH_PINNED;
1461
1462	ret = eb_select_ring(dev_priv, file, args, &ring);
1463	if (ret)
1464		return ret;
1465
1466	if (args->buffer_count < 1) {
1467		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1468		return -EINVAL;
1469	}
1470
1471	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
1472		if (!HAS_RESOURCE_STREAMER(dev)) {
1473			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
1474			return -EINVAL;
1475		}
1476		if (ring->id != RCS) {
1477			DRM_DEBUG("RS is not available on %s\n",
1478				 ring->name);
1479			return -EINVAL;
1480		}
1481
1482		dispatch_flags |= I915_DISPATCH_RS;
1483	}
1484
1485	intel_runtime_pm_get(dev_priv);
1486
1487	ret = i915_mutex_lock_interruptible(dev);
1488	if (ret)
1489		goto pre_mutex_err;
1490
1491	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1492	if (IS_ERR(ctx)) {
1493		mutex_unlock(&dev->struct_mutex);
1494		ret = PTR_ERR(ctx);
1495		goto pre_mutex_err;
1496	}
1497
1498	i915_gem_context_reference(ctx);
 
 
 
 
 
 
1499
1500	if (ctx->ppgtt)
1501		vm = &ctx->ppgtt->base;
1502	else
1503		vm = &dev_priv->gtt.base;
1504
1505	memset(&params_master, 0x00, sizeof(params_master));
1506
1507	eb = eb_create(args);
1508	if (eb == NULL) {
1509		i915_gem_context_unreference(ctx);
1510		mutex_unlock(&dev->struct_mutex);
1511		ret = -ENOMEM;
1512		goto pre_mutex_err;
1513	}
1514
1515	/* Look up object handles */
1516	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1517	if (ret)
1518		goto err;
1519
1520	/* take note of the batch buffer before we might reorder the lists */
1521	batch_obj = eb_get_batch(eb);
 
1522
1523	/* Move the objects en-masse into the GTT, evicting if necessary. */
1524	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1525	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs);
1526	if (ret)
1527		goto err;
1528
1529	/* The objects are in their final locations, apply the relocations. */
1530	if (need_relocs)
1531		ret = i915_gem_execbuffer_relocate(eb);
1532	if (ret) {
1533		if (ret == -EFAULT) {
1534			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1535								eb, exec, ctx);
1536			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1537		}
1538		if (ret)
1539			goto err;
1540	}
1541
1542	/* Set the pending read domains for the batch buffer to COMMAND */
1543	if (batch_obj->base.pending_write_domain) {
1544		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1545		ret = -EINVAL;
1546		goto err;
 
 
 
 
 
 
1547	}
1548
1549	params->args_batch_start_offset = args->batch_start_offset;
1550	if (i915_needs_cmd_parser(ring) && args->batch_len) {
1551		struct drm_i915_gem_object *parsed_batch_obj;
1552
1553		parsed_batch_obj = i915_gem_execbuffer_parse(ring,
1554						      &shadow_exec_entry,
1555						      eb,
1556						      batch_obj,
1557						      args->batch_start_offset,
1558						      args->batch_len,
1559						      file->is_master);
1560		if (IS_ERR(parsed_batch_obj)) {
1561			ret = PTR_ERR(parsed_batch_obj);
1562			goto err;
1563		}
1564
1565		/*
1566		 * parsed_batch_obj == batch_obj means batch not fully parsed:
1567		 * Accept, but don't promote to secure.
1568		 */
 
1569
1570		if (parsed_batch_obj != batch_obj) {
1571			/*
1572			 * Batch parsed and accepted:
1573			 *
1574			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1575			 * bit from MI_BATCH_BUFFER_START commands issued in
1576			 * the dispatch_execbuffer implementations. We
1577			 * specifically don't want that set on batches the
1578			 * command parser has accepted.
1579			 */
1580			dispatch_flags |= I915_DISPATCH_SECURE;
1581			params->args_batch_start_offset = 0;
1582			batch_obj = parsed_batch_obj;
1583		}
1584	}
1585
1586	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
 
1587
1588	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 
1589	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1590	 * hsw should have this fixed, but bdw mucks it up again. */
1591	if (dispatch_flags & I915_DISPATCH_SECURE) {
 
 
1592		/*
1593		 * So on first glance it looks freaky that we pin the batch here
1594		 * outside of the reservation loop. But:
1595		 * - The batch is already pinned into the relevant ppgtt, so we
1596		 *   already have the backing storage fully allocated.
1597		 * - No other BO uses the global gtt (well contexts, but meh),
1598		 *   so we don't really have issues with multiple objects not
1599		 *   fitting due to fragmentation.
1600		 * So this is actually safe.
1601		 */
1602		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
1603		if (ret)
1604			goto err;
 
 
1605
1606		params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
1607	} else
1608		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
 
 
1609
1610	/* Allocate a request for this batch buffer nice and early. */
1611	req = i915_gem_request_alloc(ring, ctx);
1612	if (IS_ERR(req)) {
1613		ret = PTR_ERR(req);
1614		goto err_batch_unpin;
1615	}
1616
1617	ret = i915_gem_request_add_to_client(req, file);
1618	if (ret)
1619		goto err_batch_unpin;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1620
1621	/*
1622	 * Save assorted stuff away to pass through to *_submission().
1623	 * NB: This data should be 'persistent' and not local as it will
1624	 * kept around beyond the duration of the IOCTL once the GPU
1625	 * scheduler arrives.
 
1626	 */
1627	params->dev                     = dev;
1628	params->file                    = file;
1629	params->ring                    = ring;
1630	params->dispatch_flags          = dispatch_flags;
1631	params->batch_obj               = batch_obj;
1632	params->ctx                     = ctx;
1633	params->request                 = req;
1634
1635	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1636
1637err_batch_unpin:
1638	/*
1639	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1640	 * batch vma for correctness. For less ugly and less fragility this
1641	 * needs to be adjusted to also track the ggtt batch vma properly as
1642	 * active.
1643	 */
1644	if (dispatch_flags & I915_DISPATCH_SECURE)
1645		i915_gem_object_ggtt_unpin(batch_obj);
 
 
 
 
 
 
 
 
 
 
1646
1647err:
1648	/* the request owns the ref now */
1649	i915_gem_context_unreference(ctx);
1650	eb_destroy(eb);
 
 
 
 
 
 
1651
1652	/*
1653	 * If the request was created but not successfully submitted then it
1654	 * must be freed again. If it was submitted then it is being tracked
1655	 * on the active request list and no clean up is required here.
1656	 */
1657	if (ret && !IS_ERR_OR_NULL(req))
1658		i915_gem_request_cancel(req);
1659
1660	mutex_unlock(&dev->struct_mutex);
1661
1662pre_mutex_err:
1663	/* intel_gpu_busy should also get a ref, so it will free when the device
1664	 * is really idle. */
1665	intel_runtime_pm_put(dev_priv);
1666	return ret;
1667}
1668
1669/*
1670 * Legacy execbuffer just creates an exec2 list from the original exec object
1671 * list array and passes it to the real function.
1672 */
1673int
1674i915_gem_execbuffer(struct drm_device *dev, void *data,
1675		    struct drm_file *file)
1676{
1677	struct drm_i915_gem_execbuffer *args = data;
1678	struct drm_i915_gem_execbuffer2 exec2;
1679	struct drm_i915_gem_exec_object *exec_list = NULL;
1680	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1681	int ret, i;
 
 
1682
1683	if (args->buffer_count < 1) {
1684		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1685		return -EINVAL;
1686	}
1687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1688	/* Copy in the exec list from userland */
1689	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1690	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
 
 
1691	if (exec_list == NULL || exec2_list == NULL) {
1692		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1693			  args->buffer_count);
1694		drm_free_large(exec_list);
1695		drm_free_large(exec2_list);
1696		return -ENOMEM;
1697	}
1698	ret = copy_from_user(exec_list,
1699			     to_user_ptr(args->buffers_ptr),
1700			     sizeof(*exec_list) * args->buffer_count);
1701	if (ret != 0) {
1702		DRM_DEBUG("copy %d exec entries failed %d\n",
1703			  args->buffer_count, ret);
1704		drm_free_large(exec_list);
1705		drm_free_large(exec2_list);
1706		return -EFAULT;
1707	}
1708
1709	for (i = 0; i < args->buffer_count; i++) {
1710		exec2_list[i].handle = exec_list[i].handle;
1711		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1712		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1713		exec2_list[i].alignment = exec_list[i].alignment;
1714		exec2_list[i].offset = exec_list[i].offset;
1715		if (INTEL_INFO(dev)->gen < 4)
1716			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1717		else
1718			exec2_list[i].flags = 0;
1719	}
1720
1721	exec2.buffers_ptr = args->buffers_ptr;
1722	exec2.buffer_count = args->buffer_count;
1723	exec2.batch_start_offset = args->batch_start_offset;
1724	exec2.batch_len = args->batch_len;
1725	exec2.DR1 = args->DR1;
1726	exec2.DR4 = args->DR4;
1727	exec2.num_cliprects = args->num_cliprects;
1728	exec2.cliprects_ptr = args->cliprects_ptr;
1729	exec2.flags = I915_EXEC_RENDER;
1730	i915_execbuffer2_set_context_id(exec2, 0);
1731
1732	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1733	if (!ret) {
1734		struct drm_i915_gem_exec_object __user *user_exec_list =
1735			to_user_ptr(args->buffers_ptr);
1736
1737		/* Copy the new buffer offsets back to the user's exec list. */
1738		for (i = 0; i < args->buffer_count; i++) {
 
 
 
1739			exec2_list[i].offset =
1740				gen8_canonical_addr(exec2_list[i].offset);
1741			ret = __copy_to_user(&user_exec_list[i].offset,
1742					     &exec2_list[i].offset,
1743					     sizeof(user_exec_list[i].offset));
1744			if (ret) {
1745				ret = -EFAULT;
1746				DRM_DEBUG("failed to copy %d exec entries "
1747					  "back to user (%d)\n",
1748					  args->buffer_count, ret);
1749				break;
1750			}
1751		}
1752	}
1753
1754	drm_free_large(exec_list);
1755	drm_free_large(exec2_list);
1756	return ret;
1757}
1758
1759int
1760i915_gem_execbuffer2(struct drm_device *dev, void *data,
1761		     struct drm_file *file)
1762{
1763	struct drm_i915_gem_execbuffer2 *args = data;
1764	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1765	int ret;
 
 
1766
1767	if (args->buffer_count < 1 ||
1768	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1769		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1770		return -EINVAL;
1771	}
1772
1773	if (args->rsvd2 != 0) {
1774		DRM_DEBUG("dirty rvsd2 field\n");
1775		return -EINVAL;
1776	}
1777
1778	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1779			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1780	if (exec2_list == NULL)
1781		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1782					   args->buffer_count);
1783	if (exec2_list == NULL) {
1784		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1785			  args->buffer_count);
1786		return -ENOMEM;
1787	}
1788	ret = copy_from_user(exec2_list,
1789			     to_user_ptr(args->buffers_ptr),
1790			     sizeof(*exec2_list) * args->buffer_count);
1791	if (ret != 0) {
1792		DRM_DEBUG("copy %d exec entries failed %d\n",
1793			  args->buffer_count, ret);
1794		drm_free_large(exec2_list);
1795		return -EFAULT;
1796	}
1797
1798	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1799	if (!ret) {
1800		/* Copy the new buffer offsets back to the user's exec list. */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1801		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1802				   to_user_ptr(args->buffers_ptr);
1803		int i;
1804
 
 
1805		for (i = 0; i < args->buffer_count; i++) {
 
 
 
1806			exec2_list[i].offset =
1807				gen8_canonical_addr(exec2_list[i].offset);
1808			ret = __copy_to_user(&user_exec_list[i].offset,
1809					     &exec2_list[i].offset,
1810					     sizeof(user_exec_list[i].offset));
1811			if (ret) {
1812				ret = -EFAULT;
1813				DRM_DEBUG("failed to copy %d exec entries "
1814					  "back to user\n",
1815					  args->buffer_count);
1816				break;
1817			}
1818		}
1819	}
1820
1821	drm_free_large(exec2_list);
1822	return ret;
 
 
1823}

   1/*
   2 * Copyright © 2008,2010 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *    Chris Wilson <chris@chris-wilson.co.uk>
  26 *
  27 */
  28
  29#include <linux/dma_remapping.h>
  30#include <linux/reservation.h>
  31#include <linux/sync_file.h>
  32#include <linux/uaccess.h>
  33
  34#include <drm/drmP.h>
  35#include <drm/drm_syncobj.h>
  36#include <drm/i915_drm.h>
  37
  38#include "i915_drv.h"
  39#include "i915_gem_clflush.h"
  40#include "i915_trace.h"
  41#include "intel_drv.h"
  42#include "intel_frontbuffer.h"
 
  43
  44enum {
  45	FORCE_CPU_RELOC = 1,
  46	FORCE_GTT_RELOC,
  47	FORCE_GPU_RELOC,
  48#define DBG_FORCE_RELOC 0 /* choose one of the above! */
  49};
  50
  51#define __EXEC_OBJECT_HAS_REF		BIT(31)
  52#define __EXEC_OBJECT_HAS_PIN		BIT(30)
  53#define __EXEC_OBJECT_HAS_FENCE		BIT(29)
  54#define __EXEC_OBJECT_NEEDS_MAP		BIT(28)
  55#define __EXEC_OBJECT_NEEDS_BIAS	BIT(27)
  56#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 27) /* all of the above */
  57#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
  58
  59#define __EXEC_HAS_RELOC	BIT(31)
  60#define __EXEC_VALIDATED	BIT(30)
  61#define __EXEC_INTERNAL_FLAGS	(~0u << 30)
  62#define UPDATE			PIN_OFFSET_FIXED
  63
  64#define BATCH_OFFSET_BIAS (256*1024)
  65
  66#define __I915_EXEC_ILLEGAL_FLAGS \
  67	(__I915_EXEC_UNKNOWN_FLAGS | I915_EXEC_CONSTANTS_MASK)
  68
  69/**
  70 * DOC: User command execution
  71 *
  72 * Userspace submits commands to be executed on the GPU as an instruction
  73 * stream within a GEM object we call a batchbuffer. This instructions may
  74 * refer to other GEM objects containing auxiliary state such as kernels,
  75 * samplers, render targets and even secondary batchbuffers. Userspace does
  76 * not know where in the GPU memory these objects reside and so before the
  77 * batchbuffer is passed to the GPU for execution, those addresses in the
  78 * batchbuffer and auxiliary objects are updated. This is known as relocation,
  79 * or patching. To try and avoid having to relocate each object on the next
  80 * execution, userspace is told the location of those objects in this pass,
  81 * but this remains just a hint as the kernel may choose a new location for
  82 * any object in the future.
  83 *
  84 * Processing an execbuf ioctl is conceptually split up into a few phases.
  85 *
  86 * 1. Validation - Ensure all the pointers, handles and flags are valid.
  87 * 2. Reservation - Assign GPU address space for every object
  88 * 3. Relocation - Update any addresses to point to the final locations
  89 * 4. Serialisation - Order the request with respect to its dependencies
  90 * 5. Construction - Construct a request to execute the batchbuffer
  91 * 6. Submission (at some point in the future execution)
  92 *
  93 * Reserving resources for the execbuf is the most complicated phase. We
  94 * neither want to have to migrate the object in the address space, nor do
  95 * we want to have to update any relocations pointing to this object. Ideally,
  96 * we want to leave the object where it is and for all the existing relocations
  97 * to match. If the object is given a new address, or if userspace thinks the
  98 * object is elsewhere, we have to parse all the relocation entries and update
  99 * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
 100 * all the target addresses in all of its objects match the value in the
 101 * relocation entries and that they all match the presumed offsets given by the
 102 * list of execbuffer objects. Using this knowledge, we know that if we haven't
 103 * moved any buffers, all the relocation entries are valid and we can skip
 104 * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
 105 * hang.) The requirement for using I915_EXEC_NO_RELOC are:
 106 *
 107 *      The addresses written in the objects must match the corresponding
 108 *      reloc.presumed_offset which in turn must match the corresponding
 109 *      execobject.offset.
 110 *
 111 *      Any render targets written to in the batch must be flagged with
 112 *      EXEC_OBJECT_WRITE.
 113 *
 114 *      To avoid stalling, execobject.offset should match the current
 115 *      address of that object within the active context.
 116 *
 117 * The reservation is done is multiple phases. First we try and keep any
 118 * object already bound in its current location - so as long as meets the
 119 * constraints imposed by the new execbuffer. Any object left unbound after the
 120 * first pass is then fitted into any available idle space. If an object does
 121 * not fit, all objects are removed from the reservation and the process rerun
 122 * after sorting the objects into a priority order (more difficult to fit
 123 * objects are tried first). Failing that, the entire VM is cleared and we try
 124 * to fit the execbuf once last time before concluding that it simply will not
 125 * fit.
 126 *
 127 * A small complication to all of this is that we allow userspace not only to
 128 * specify an alignment and a size for the object in the address space, but
 129 * we also allow userspace to specify the exact offset. This objects are
 130 * simpler to place (the location is known a priori) all we have to do is make
 131 * sure the space is available.
 132 *
 133 * Once all the objects are in place, patching up the buried pointers to point
 134 * to the final locations is a fairly simple job of walking over the relocation
 135 * entry arrays, looking up the right address and rewriting the value into
 136 * the object. Simple! ... The relocation entries are stored in user memory
 137 * and so to access them we have to copy them into a local buffer. That copy
 138 * has to avoid taking any pagefaults as they may lead back to a GEM object
 139 * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
 140 * the relocation into multiple passes. First we try to do everything within an
 141 * atomic context (avoid the pagefaults) which requires that we never wait. If
 142 * we detect that we may wait, or if we need to fault, then we have to fallback
 143 * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
 144 * bells yet?) Dropping the mutex means that we lose all the state we have
 145 * built up so far for the execbuf and we must reset any global data. However,
 146 * we do leave the objects pinned in their final locations - which is a
 147 * potential issue for concurrent execbufs. Once we have left the mutex, we can
 148 * allocate and copy all the relocation entries into a large array at our
 149 * leisure, reacquire the mutex, reclaim all the objects and other state and
 150 * then proceed to update any incorrect addresses with the objects.
 151 *
 152 * As we process the relocation entries, we maintain a record of whether the
 153 * object is being written to. Using NORELOC, we expect userspace to provide
 154 * this information instead. We also check whether we can skip the relocation
 155 * by comparing the expected value inside the relocation entry with the target's
 156 * final address. If they differ, we have to map the current object and rewrite
 157 * the 4 or 8 byte pointer within.
 158 *
 159 * Serialising an execbuf is quite simple according to the rules of the GEM
 160 * ABI. Execution within each context is ordered by the order of submission.
 161 * Writes to any GEM object are in order of submission and are exclusive. Reads
 162 * from a GEM object are unordered with respect to other reads, but ordered by
 163 * writes. A write submitted after a read cannot occur before the read, and
 164 * similarly any read submitted after a write cannot occur before the write.
 165 * Writes are ordered between engines such that only one write occurs at any
 166 * time (completing any reads beforehand) - using semaphores where available
 167 * and CPU serialisation otherwise. Other GEM access obey the same rules, any
 168 * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
 169 * reads before starting, and any read (either using set-domain or pread) must
 170 * flush all GPU writes before starting. (Note we only employ a barrier before,
 171 * we currently rely on userspace not concurrently starting a new execution
 172 * whilst reading or writing to an object. This may be an advantage or not
 173 * depending on how much you trust userspace not to shoot themselves in the
 174 * foot.) Serialisation may just result in the request being inserted into
 175 * a DAG awaiting its turn, but most simple is to wait on the CPU until
 176 * all dependencies are resolved.
 177 *
 178 * After all of that, is just a matter of closing the request and handing it to
 179 * the hardware (well, leaving it in a queue to be executed). However, we also
 180 * offer the ability for batchbuffers to be run with elevated privileges so
 181 * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
 182 * Before any batch is given extra privileges we first must check that it
 183 * contains no nefarious instructions, we check that each instruction is from
 184 * our whitelist and all registers are also from an allowed list. We first
 185 * copy the user's batchbuffer to a shadow (so that the user doesn't have
 186 * access to it, either by the CPU or GPU as we scan it) and then parse each
 187 * instruction. If everything is ok, we set a flag telling the hardware to run
 188 * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
 189 */
 190
 191struct i915_execbuffer {
 192	struct drm_i915_private *i915; /** i915 backpointer */
 193	struct drm_file *file; /** per-file lookup tables and limits */
 194	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
 195	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
 196	struct i915_vma **vma;
 197	unsigned int *flags;
 198
 199	struct intel_engine_cs *engine; /** engine to queue the request to */
 200	struct i915_gem_context *ctx; /** context for building the request */
 201	struct i915_address_space *vm; /** GTT and vma for the request */
 202
 203	struct i915_request *request; /** our request to build */
 204	struct i915_vma *batch; /** identity of the batch obj/vma */
 205
 206	/** actual size of execobj[] as we may extend it for the cmdparser */
 207	unsigned int buffer_count;
 208
 209	/** list of vma not yet bound during reservation phase */
 210	struct list_head unbound;
 211
 212	/** list of vma that have execobj.relocation_count */
 213	struct list_head relocs;
 214
 215	/**
 216	 * Track the most recently used object for relocations, as we
 217	 * frequently have to perform multiple relocations within the same
 218	 * obj/page
 219	 */
 220	struct reloc_cache {
 221		struct drm_mm_node node; /** temporary GTT binding */
 222		unsigned long vaddr; /** Current kmap address */
 223		unsigned long page; /** Currently mapped page index */
 224		unsigned int gen; /** Cached value of INTEL_GEN */
 225		bool use_64bit_reloc : 1;
 226		bool has_llc : 1;
 227		bool has_fence : 1;
 228		bool needs_unfenced : 1;
 229
 230		struct i915_request *rq;
 231		u32 *rq_cmd;
 232		unsigned int rq_size;
 233	} reloc_cache;
 234
 235	u64 invalid_flags; /** Set of execobj.flags that are invalid */
 236	u32 context_flags; /** Set of execobj.flags to insert from the ctx */
 237
 238	u32 batch_start_offset; /** Location within object of batch */
 239	u32 batch_len; /** Length of batch within object */
 240	u32 batch_flags; /** Flags composed for emit_bb_start() */
 241
 242	/**
 243	 * Indicate either the size of the hastable used to resolve
 244	 * relocation handles, or if negative that we are using a direct
 245	 * index into the execobj[].
 246	 */
 247	int lut_size;
 248	struct hlist_head *buckets; /** ht for relocation handles */
 249};
 250
 251#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
 252
 253/*
 254 * Used to convert any address to canonical form.
 255 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
 256 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
 257 * addresses to be in a canonical form:
 258 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
 259 * canonical form [63:48] == [47]."
 260 */
 261#define GEN8_HIGH_ADDRESS_BIT 47
 262static inline u64 gen8_canonical_addr(u64 address)
 263{
 264	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
 265}
 266
 267static inline u64 gen8_noncanonical_addr(u64 address)
 268{
 269	return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
 270}
 271
 272static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
 273{
 274	return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
 275}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 276
 277static int eb_create(struct i915_execbuffer *eb)
 278{
 279	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
 280		unsigned int size = 1 + ilog2(eb->buffer_count);
 281
 282		/*
 283		 * Without a 1:1 association between relocation handles and
 284		 * the execobject[] index, we instead create a hashtable.
 285		 * We size it dynamically based on available memory, starting
 286		 * first with 1:1 assocative hash and scaling back until
 287		 * the allocation succeeds.
 288		 *
 289		 * Later on we use a positive lut_size to indicate we are
 290		 * using this hashtable, and a negative value to indicate a
 291		 * direct lookup.
 292		 */
 293		do {
 294			gfp_t flags;
 295
 296			/* While we can still reduce the allocation size, don't
 297			 * raise a warning and allow the allocation to fail.
 298			 * On the last pass though, we want to try as hard
 299			 * as possible to perform the allocation and warn
 300			 * if it fails.
 301			 */
 302			flags = GFP_KERNEL;
 303			if (size > 1)
 304				flags |= __GFP_NORETRY | __GFP_NOWARN;
 305
 306			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
 307					      flags);
 308			if (eb->buckets)
 309				break;
 310		} while (--size);
 311
 312		if (unlikely(!size))
 313			return -ENOMEM;
 314
 315		eb->lut_size = size;
 316	} else {
 317		eb->lut_size = -eb->buffer_count;
 318	}
 319
 320	return 0;
 321}
 322
 323static bool
 324eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
 325		 const struct i915_vma *vma,
 326		 unsigned int flags)
 327{
 328	if (vma->node.size < entry->pad_to_size)
 329		return true;
 330
 331	if (entry->alignment && !IS_ALIGNED(vma->node.start, entry->alignment))
 332		return true;
 333
 334	if (flags & EXEC_OBJECT_PINNED &&
 335	    vma->node.start != entry->offset)
 336		return true;
 337
 338	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
 339	    vma->node.start < BATCH_OFFSET_BIAS)
 340		return true;
 341
 342	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
 343	    (vma->node.start + vma->node.size - 1) >> 32)
 344		return true;
 345
 346	if (flags & __EXEC_OBJECT_NEEDS_MAP &&
 347	    !i915_vma_is_map_and_fenceable(vma))
 348		return true;
 349
 350	return false;
 351}
 352
 353static inline bool
 354eb_pin_vma(struct i915_execbuffer *eb,
 355	   const struct drm_i915_gem_exec_object2 *entry,
 356	   struct i915_vma *vma)
 357{
 358	unsigned int exec_flags = *vma->exec_flags;
 359	u64 pin_flags;
 360
 361	if (vma->node.size)
 362		pin_flags = vma->node.start;
 363	else
 364		pin_flags = entry->offset & PIN_OFFSET_MASK;
 365
 366	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
 367	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_GTT))
 368		pin_flags |= PIN_GLOBAL;
 369
 370	if (unlikely(i915_vma_pin(vma, 0, 0, pin_flags)))
 371		return false;
 372
 373	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 374		if (unlikely(i915_vma_pin_fence(vma))) {
 375			i915_vma_unpin(vma);
 376			return false;
 377		}
 378
 379		if (vma->fence)
 380			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 381	}
 382
 383	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
 384	return !eb_vma_misplaced(entry, vma, exec_flags);
 385}
 386
 387static inline void __eb_unreserve_vma(struct i915_vma *vma, unsigned int flags)
 388{
 389	GEM_BUG_ON(!(flags & __EXEC_OBJECT_HAS_PIN));
 390
 391	if (unlikely(flags & __EXEC_OBJECT_HAS_FENCE))
 392		__i915_vma_unpin_fence(vma);
 393
 394	__i915_vma_unpin(vma);
 395}
 396
 397static inline void
 398eb_unreserve_vma(struct i915_vma *vma, unsigned int *flags)
 399{
 400	if (!(*flags & __EXEC_OBJECT_HAS_PIN))
 401		return;
 402
 403	__eb_unreserve_vma(vma, *flags);
 404	*flags &= ~__EXEC_OBJECT_RESERVED;
 405}
 406
 407static int
 408eb_validate_vma(struct i915_execbuffer *eb,
 409		struct drm_i915_gem_exec_object2 *entry,
 410		struct i915_vma *vma)
 
 
 411{
 412	if (unlikely(entry->flags & eb->invalid_flags))
 413		return -EINVAL;
 
 414
 415	if (unlikely(entry->alignment && !is_power_of_2(entry->alignment)))
 416		return -EINVAL;
 417
 418	/*
 419	 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
 420	 * any non-page-aligned or non-canonical addresses.
 421	 */
 422	if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
 423		     entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK)))
 424		return -EINVAL;
 425
 426	/* pad_to_size was once a reserved field, so sanitize it */
 427	if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
 428		if (unlikely(offset_in_page(entry->pad_to_size)))
 429			return -EINVAL;
 430	} else {
 431		entry->pad_to_size = 0;
 432	}
 433
 434	if (unlikely(vma->exec_flags)) {
 435		DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
 436			  entry->handle, (int)(entry - eb->exec));
 437		return -EINVAL;
 438	}
 439
 440	/*
 441	 * From drm_mm perspective address space is continuous,
 442	 * so from this point we're always using non-canonical
 443	 * form internally.
 444	 */
 445	entry->offset = gen8_noncanonical_addr(entry->offset);
 446
 447	if (!eb->reloc_cache.has_fence) {
 448		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 449	} else {
 450		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
 451		     eb->reloc_cache.needs_unfenced) &&
 452		    i915_gem_object_is_tiled(vma->obj))
 453			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
 454	}
 455
 456	if (!(entry->flags & EXEC_OBJECT_PINNED))
 457		entry->flags |= eb->context_flags;
 458
 459	return 0;
 460}
 461
 462static int
 463eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 464{
 465	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 466	int err;
 467
 468	GEM_BUG_ON(i915_vma_is_closed(vma));
 469
 470	if (!(eb->args->flags & __EXEC_VALIDATED)) {
 471		err = eb_validate_vma(eb, entry, vma);
 472		if (unlikely(err))
 473			return err;
 474	}
 475
 476	if (eb->lut_size > 0) {
 477		vma->exec_handle = entry->handle;
 478		hlist_add_head(&vma->exec_node,
 479			       &eb->buckets[hash_32(entry->handle,
 480						    eb->lut_size)]);
 481	}
 482
 483	if (entry->relocation_count)
 484		list_add_tail(&vma->reloc_link, &eb->relocs);
 485
 486	/*
 487	 * Stash a pointer from the vma to execobj, so we can query its flags,
 488	 * size, alignment etc as provided by the user. Also we stash a pointer
 489	 * to the vma inside the execobj so that we can use a direct lookup
 490	 * to find the right target VMA when doing relocations.
 491	 */
 492	eb->vma[i] = vma;
 493	eb->flags[i] = entry->flags;
 494	vma->exec_flags = &eb->flags[i];
 495
 496	err = 0;
 497	if (eb_pin_vma(eb, entry, vma)) {
 498		if (entry->offset != vma->node.start) {
 499			entry->offset = vma->node.start | UPDATE;
 500			eb->args->flags |= __EXEC_HAS_RELOC;
 501		}
 502	} else {
 503		eb_unreserve_vma(vma, vma->exec_flags);
 504
 505		list_add_tail(&vma->exec_link, &eb->unbound);
 506		if (drm_mm_node_allocated(&vma->node))
 507			err = i915_vma_unbind(vma);
 508		if (unlikely(err))
 509			vma->exec_flags = NULL;
 510	}
 511	return err;
 512}
 513
 514static inline int use_cpu_reloc(const struct reloc_cache *cache,
 515				const struct drm_i915_gem_object *obj)
 516{
 517	if (!i915_gem_object_has_struct_page(obj))
 518		return false;
 519
 520	if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
 521		return true;
 522
 523	if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
 524		return false;
 525
 526	return (cache->has_llc ||
 527		obj->cache_dirty ||
 528		obj->cache_level != I915_CACHE_NONE);
 529}
 530
 531static int eb_reserve_vma(const struct i915_execbuffer *eb,
 532			  struct i915_vma *vma)
 533{
 534	struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
 535	unsigned int exec_flags = *vma->exec_flags;
 536	u64 pin_flags;
 537	int err;
 538
 539	pin_flags = PIN_USER | PIN_NONBLOCK;
 540	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
 541		pin_flags |= PIN_GLOBAL;
 542
 543	/*
 544	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
 545	 * limit address to the first 4GBs for unflagged objects.
 546	 */
 547	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
 548		pin_flags |= PIN_ZONE_4G;
 549
 550	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
 551		pin_flags |= PIN_MAPPABLE;
 552
 553	if (exec_flags & EXEC_OBJECT_PINNED) {
 554		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
 555		pin_flags &= ~PIN_NONBLOCK; /* force overlapping checks */
 556	} else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS) {
 557		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
 558	}
 559
 560	err = i915_vma_pin(vma,
 561			   entry->pad_to_size, entry->alignment,
 562			   pin_flags);
 563	if (err)
 564		return err;
 565
 566	if (entry->offset != vma->node.start) {
 567		entry->offset = vma->node.start | UPDATE;
 568		eb->args->flags |= __EXEC_HAS_RELOC;
 569	}
 570
 571	if (unlikely(exec_flags & EXEC_OBJECT_NEEDS_FENCE)) {
 572		err = i915_vma_pin_fence(vma);
 573		if (unlikely(err)) {
 574			i915_vma_unpin(vma);
 575			return err;
 576		}
 577
 578		if (vma->fence)
 579			exec_flags |= __EXEC_OBJECT_HAS_FENCE;
 580	}
 
 581
 582	*vma->exec_flags = exec_flags | __EXEC_OBJECT_HAS_PIN;
 583	GEM_BUG_ON(eb_vma_misplaced(entry, vma, exec_flags));
 
 584
 585	return 0;
 586}
 
 587
 588static int eb_reserve(struct i915_execbuffer *eb)
 589{
 590	const unsigned int count = eb->buffer_count;
 591	struct list_head last;
 592	struct i915_vma *vma;
 593	unsigned int i, pass;
 594	int err;
 595
 596	/*
 597	 * Attempt to pin all of the buffers into the GTT.
 598	 * This is done in 3 phases:
 599	 *
 600	 * 1a. Unbind all objects that do not match the GTT constraints for
 601	 *     the execbuffer (fenceable, mappable, alignment etc).
 602	 * 1b. Increment pin count for already bound objects.
 603	 * 2.  Bind new objects.
 604	 * 3.  Decrement pin count.
 605	 *
 606	 * This avoid unnecessary unbinding of later objects in order to make
 607	 * room for the earlier objects *unless* we need to defragment.
 608	 */
 609
 610	pass = 0;
 611	err = 0;
 612	do {
 613		list_for_each_entry(vma, &eb->unbound, exec_link) {
 614			err = eb_reserve_vma(eb, vma);
 615			if (err)
 616				break;
 617		}
 618		if (err != -ENOSPC)
 619			return err;
 620
 621		/* Resort *all* the objects into priority order */
 622		INIT_LIST_HEAD(&eb->unbound);
 623		INIT_LIST_HEAD(&last);
 624		for (i = 0; i < count; i++) {
 625			unsigned int flags = eb->flags[i];
 626			struct i915_vma *vma = eb->vma[i];
 627
 628			if (flags & EXEC_OBJECT_PINNED &&
 629			    flags & __EXEC_OBJECT_HAS_PIN)
 630				continue;
 631
 632			eb_unreserve_vma(vma, &eb->flags[i]);
 633
 634			if (flags & EXEC_OBJECT_PINNED)
 635				list_add(&vma->exec_link, &eb->unbound);
 636			else if (flags & __EXEC_OBJECT_NEEDS_MAP)
 637				list_add_tail(&vma->exec_link, &eb->unbound);
 638			else
 639				list_add_tail(&vma->exec_link, &last);
 640		}
 641		list_splice_tail(&last, &eb->unbound);
 642
 643		switch (pass++) {
 644		case 0:
 645			break;
 646
 647		case 1:
 648			/* Too fragmented, unbind everything and retry */
 649			err = i915_gem_evict_vm(eb->vm);
 650			if (err)
 651				return err;
 652			break;
 653
 654		default:
 655			return -ENOSPC;
 656		}
 657	} while (1);
 658}
 659
 660static unsigned int eb_batch_index(const struct i915_execbuffer *eb)
 661{
 662	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
 663		return 0;
 664	else
 665		return eb->buffer_count - 1;
 666}
 667
 668static int eb_select_context(struct i915_execbuffer *eb)
 669{
 670	struct i915_gem_context *ctx;
 671
 672	ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
 673	if (unlikely(!ctx))
 674		return -ENOENT;
 675
 676	eb->ctx = ctx;
 677	eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base;
 678
 679	eb->context_flags = 0;
 680	if (ctx->flags & CONTEXT_NO_ZEROMAP)
 681		eb->context_flags |= __EXEC_OBJECT_NEEDS_BIAS;
 682
 683	return 0;
 684}
 685
 686static int eb_lookup_vmas(struct i915_execbuffer *eb)
 687{
 688	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
 689	struct drm_i915_gem_object *obj;
 690	unsigned int i;
 691	int err;
 692
 693	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
 694		return -ENOENT;
 695
 696	if (unlikely(i915_gem_context_is_banned(eb->ctx)))
 697		return -EIO;
 698
 699	INIT_LIST_HEAD(&eb->relocs);
 700	INIT_LIST_HEAD(&eb->unbound);
 701
 702	for (i = 0; i < eb->buffer_count; i++) {
 703		u32 handle = eb->exec[i].handle;
 704		struct i915_lut_handle *lut;
 705		struct i915_vma *vma;
 706
 707		vma = radix_tree_lookup(handles_vma, handle);
 708		if (likely(vma))
 709			goto add_vma;
 710
 711		obj = i915_gem_object_lookup(eb->file, handle);
 712		if (unlikely(!obj)) {
 713			err = -ENOENT;
 714			goto err_vma;
 715		}
 716
 717		vma = i915_vma_instance(obj, eb->vm, NULL);
 718		if (unlikely(IS_ERR(vma))) {
 719			err = PTR_ERR(vma);
 720			goto err_obj;
 721		}
 722
 723		lut = kmem_cache_alloc(eb->i915->luts, GFP_KERNEL);
 724		if (unlikely(!lut)) {
 725			err = -ENOMEM;
 726			goto err_obj;
 727		}
 728
 729		err = radix_tree_insert(handles_vma, handle, vma);
 730		if (unlikely(err)) {
 731			kmem_cache_free(eb->i915->luts, lut);
 732			goto err_obj;
 733		}
 734
 735		/* transfer ref to ctx */
 736		vma->open_count++;
 737		list_add(&lut->obj_link, &obj->lut_list);
 738		list_add(&lut->ctx_link, &eb->ctx->handles_list);
 739		lut->ctx = eb->ctx;
 740		lut->handle = handle;
 741
 742add_vma:
 743		err = eb_add_vma(eb, i, vma);
 744		if (unlikely(err))
 745			goto err_vma;
 746
 747		GEM_BUG_ON(vma != eb->vma[i]);
 748		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 749	}
 750
 751	/* take note of the batch buffer before we might reorder the lists */
 752	i = eb_batch_index(eb);
 753	eb->batch = eb->vma[i];
 754	GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
 755
 756	/*
 757	 * SNA is doing fancy tricks with compressing batch buffers, which leads
 758	 * to negative relocation deltas. Usually that works out ok since the
 759	 * relocate address is still positive, except when the batch is placed
 760	 * very low in the GTT. Ensure this doesn't happen.
 761	 *
 762	 * Note that actual hangs have only been observed on gen7, but for
 763	 * paranoia do it everywhere.
 764	 */
 765	if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
 766		eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
 767	if (eb->reloc_cache.has_fence)
 768		eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
 769
 770	eb->args->flags |= __EXEC_VALIDATED;
 771	return eb_reserve(eb);
 772
 773err_obj:
 774	i915_gem_object_put(obj);
 775err_vma:
 776	eb->vma[i] = NULL;
 777	return err;
 778}
 779
 780static struct i915_vma *
 781eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
 782{
 783	if (eb->lut_size < 0) {
 784		if (handle >= -eb->lut_size)
 785			return NULL;
 786		return eb->vma[handle];
 787	} else {
 788		struct hlist_head *head;
 789		struct i915_vma *vma;
 790
 791		head = &eb->buckets[hash_32(handle, eb->lut_size)];
 792		hlist_for_each_entry(vma, head, exec_node) {
 793			if (vma->exec_handle == handle)
 794				return vma;
 795		}
 796		return NULL;
 797	}
 798}
 799
 800static void eb_release_vmas(const struct i915_execbuffer *eb)
 
 801{
 802	const unsigned int count = eb->buffer_count;
 803	unsigned int i;
 804
 805	for (i = 0; i < count; i++) {
 806		struct i915_vma *vma = eb->vma[i];
 807		unsigned int flags = eb->flags[i];
 808
 809		if (!vma)
 810			break;
 811
 812		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
 813		vma->exec_flags = NULL;
 814		eb->vma[i] = NULL;
 815
 816		if (flags & __EXEC_OBJECT_HAS_PIN)
 817			__eb_unreserve_vma(vma, flags);
 818
 819		if (flags & __EXEC_OBJECT_HAS_REF)
 820			i915_vma_put(vma);
 821	}
 822}
 823
 824static void eb_reset_vmas(const struct i915_execbuffer *eb)
 825{
 826	eb_release_vmas(eb);
 827	if (eb->lut_size > 0)
 828		memset(eb->buckets, 0,
 829		       sizeof(struct hlist_head) << eb->lut_size);
 830}
 831
 832static void eb_destroy(const struct i915_execbuffer *eb)
 833{
 834	GEM_BUG_ON(eb->reloc_cache.rq);
 835
 836	if (eb->lut_size > 0)
 837		kfree(eb->buckets);
 
 
 838}
 839
 840static inline u64
 841relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
 842		  const struct i915_vma *target)
 843{
 844	return gen8_canonical_addr((int)reloc->delta + target->node.start);
 
 
 845}
 846
 847static void reloc_cache_init(struct reloc_cache *cache,
 848			     struct drm_i915_private *i915)
 
 
 
 
 
 
 
 849{
 850	cache->page = -1;
 851	cache->vaddr = 0;
 852	/* Must be a variable in the struct to allow GCC to unroll. */
 853	cache->gen = INTEL_GEN(i915);
 854	cache->has_llc = HAS_LLC(i915);
 855	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
 856	cache->has_fence = cache->gen < 4;
 857	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
 858	cache->node.allocated = false;
 859	cache->rq = NULL;
 860	cache->rq_size = 0;
 861}
 862
 863static inline void *unmask_page(unsigned long p)
 864{
 865	return (void *)(uintptr_t)(p & PAGE_MASK);
 866}
 867
 868static inline unsigned int unmask_flags(unsigned long p)
 
 
 869{
 870	return p & ~PAGE_MASK;
 871}
 872
 873#define KMAP 0x4 /* after CLFLUSH_FLAGS */
 874
 875static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
 
 876{
 877	struct drm_i915_private *i915 =
 878		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
 879	return &i915->ggtt;
 880}
 
 881
 882static void reloc_gpu_flush(struct reloc_cache *cache)
 883{
 884	GEM_BUG_ON(cache->rq_size >= cache->rq->batch->obj->base.size / sizeof(u32));
 885	cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
 886	i915_gem_object_unpin_map(cache->rq->batch->obj);
 887	i915_gem_chipset_flush(cache->rq->i915);
 888
 889	__i915_request_add(cache->rq, true);
 890	cache->rq = NULL;
 891}
 892
 893static void reloc_cache_reset(struct reloc_cache *cache)
 894{
 895	void *vaddr;
 896
 897	if (cache->rq)
 898		reloc_gpu_flush(cache);
 899
 900	if (!cache->vaddr)
 901		return;
 902
 903	vaddr = unmask_page(cache->vaddr);
 904	if (cache->vaddr & KMAP) {
 905		if (cache->vaddr & CLFLUSH_AFTER)
 906			mb();
 907
 908		kunmap_atomic(vaddr);
 909		i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm);
 910	} else {
 911		wmb();
 912		io_mapping_unmap_atomic((void __iomem *)vaddr);
 913		if (cache->node.allocated) {
 914			struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 915
 916			ggtt->base.clear_range(&ggtt->base,
 917					       cache->node.start,
 918					       cache->node.size);
 919			drm_mm_remove_node(&cache->node);
 920		} else {
 921			i915_vma_unpin((struct i915_vma *)cache->node.mm);
 922		}
 923	}
 924
 925	cache->vaddr = 0;
 926	cache->page = -1;
 927}
 928
 929static void *reloc_kmap(struct drm_i915_gem_object *obj,
 930			struct reloc_cache *cache,
 931			unsigned long page)
 932{
 933	void *vaddr;
 934
 935	if (cache->vaddr) {
 936		kunmap_atomic(unmask_page(cache->vaddr));
 937	} else {
 938		unsigned int flushes;
 939		int err;
 940
 941		err = i915_gem_obj_prepare_shmem_write(obj, &flushes);
 942		if (err)
 943			return ERR_PTR(err);
 944
 945		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
 946		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
 947
 948		cache->vaddr = flushes | KMAP;
 949		cache->node.mm = (void *)obj;
 950		if (flushes)
 951			mb();
 952	}
 953
 954	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page));
 955	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
 956	cache->page = page;
 957
 958	return vaddr;
 959}
 960
 961static void *reloc_iomap(struct drm_i915_gem_object *obj,
 962			 struct reloc_cache *cache,
 963			 unsigned long page)
 964{
 965	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
 966	unsigned long offset;
 967	void *vaddr;
 968
 969	if (cache->vaddr) {
 970		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
 971	} else {
 972		struct i915_vma *vma;
 973		int err;
 974
 975		if (use_cpu_reloc(cache, obj))
 976			return NULL;
 977
 978		err = i915_gem_object_set_to_gtt_domain(obj, true);
 979		if (err)
 980			return ERR_PTR(err);
 981
 982		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
 983					       PIN_MAPPABLE |
 984					       PIN_NONBLOCK |
 985					       PIN_NONFAULT);
 986		if (IS_ERR(vma)) {
 987			memset(&cache->node, 0, sizeof(cache->node));
 988			err = drm_mm_insert_node_in_range
 989				(&ggtt->base.mm, &cache->node,
 990				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
 991				 0, ggtt->mappable_end,
 992				 DRM_MM_INSERT_LOW);
 993			if (err) /* no inactive aperture space, use cpu reloc */
 994				return NULL;
 995		} else {
 996			err = i915_vma_put_fence(vma);
 997			if (err) {
 998				i915_vma_unpin(vma);
 999				return ERR_PTR(err);
1000			}
1001
1002			cache->node.start = vma->node.start;
1003			cache->node.mm = (void *)vma;
1004		}
1005	}
1006
1007	offset = cache->node.start;
1008	if (cache->node.allocated) {
1009		wmb();
1010		ggtt->base.insert_page(&ggtt->base,
1011				       i915_gem_object_get_dma_address(obj, page),
1012				       offset, I915_CACHE_NONE, 0);
1013	} else {
1014		offset += page << PAGE_SHIFT;
1015	}
1016
1017	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
1018							 offset);
1019	cache->page = page;
1020	cache->vaddr = (unsigned long)vaddr;
1021
1022	return vaddr;
1023}
1024
1025static void *reloc_vaddr(struct drm_i915_gem_object *obj,
1026			 struct reloc_cache *cache,
1027			 unsigned long page)
1028{
1029	void *vaddr;
1030
1031	if (cache->page == page) {
1032		vaddr = unmask_page(cache->vaddr);
1033	} else {
1034		vaddr = NULL;
1035		if ((cache->vaddr & KMAP) == 0)
1036			vaddr = reloc_iomap(obj, cache, page);
1037		if (!vaddr)
1038			vaddr = reloc_kmap(obj, cache, page);
1039	}
1040
1041	return vaddr;
1042}
1043
1044static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
 
 
 
1045{
1046	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
1047		if (flushes & CLFLUSH_BEFORE) {
1048			clflushopt(addr);
1049			mb();
1050		}
1051
1052		*addr = value;
 
 
1053
1054		/*
1055		 * Writes to the same cacheline are serialised by the CPU
1056		 * (including clflush). On the write path, we only require
1057		 * that it hits memory in an orderly fashion and place
1058		 * mb barriers at the start and end of the relocation phase
1059		 * to ensure ordering of clflush wrt to the system.
1060		 */
1061		if (flushes & CLFLUSH_AFTER)
1062			clflushopt(addr);
1063	} else
1064		*addr = value;
1065}
1066
1067static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
1068			     struct i915_vma *vma,
1069			     unsigned int len)
1070{
1071	struct reloc_cache *cache = &eb->reloc_cache;
1072	struct drm_i915_gem_object *obj;
1073	struct i915_request *rq;
1074	struct i915_vma *batch;
1075	u32 *cmd;
1076	int err;
1077
1078	GEM_BUG_ON(vma->obj->write_domain & I915_GEM_DOMAIN_CPU);
1079
1080	obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, PAGE_SIZE);
1081	if (IS_ERR(obj))
1082		return PTR_ERR(obj);
1083
1084	cmd = i915_gem_object_pin_map(obj,
1085				      cache->has_llc ?
1086				      I915_MAP_FORCE_WB :
1087				      I915_MAP_FORCE_WC);
1088	i915_gem_object_unpin_pages(obj);
1089	if (IS_ERR(cmd))
1090		return PTR_ERR(cmd);
1091
1092	err = i915_gem_object_set_to_wc_domain(obj, false);
1093	if (err)
1094		goto err_unmap;
1095
1096	batch = i915_vma_instance(obj, vma->vm, NULL);
1097	if (IS_ERR(batch)) {
1098		err = PTR_ERR(batch);
1099		goto err_unmap;
1100	}
1101
1102	err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK);
1103	if (err)
1104		goto err_unmap;
1105
1106	rq = i915_request_alloc(eb->engine, eb->ctx);
1107	if (IS_ERR(rq)) {
1108		err = PTR_ERR(rq);
1109		goto err_unpin;
1110	}
1111
1112	err = i915_request_await_object(rq, vma->obj, true);
1113	if (err)
1114		goto err_request;
1115
1116	err = eb->engine->emit_bb_start(rq,
1117					batch->node.start, PAGE_SIZE,
1118					cache->gen > 5 ? 0 : I915_DISPATCH_SECURE);
1119	if (err)
1120		goto err_request;
1121
1122	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
1123	i915_vma_move_to_active(batch, rq, 0);
1124	reservation_object_lock(batch->resv, NULL);
1125	reservation_object_add_excl_fence(batch->resv, &rq->fence);
1126	reservation_object_unlock(batch->resv);
1127	i915_vma_unpin(batch);
1128
1129	i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1130	reservation_object_lock(vma->resv, NULL);
1131	reservation_object_add_excl_fence(vma->resv, &rq->fence);
1132	reservation_object_unlock(vma->resv);
1133
1134	rq->batch = batch;
1135
1136	cache->rq = rq;
1137	cache->rq_cmd = cmd;
1138	cache->rq_size = 0;
1139
1140	/* Return with batch mapping (cmd) still pinned */
1141	return 0;
1142
1143err_request:
1144	i915_request_add(rq);
1145err_unpin:
1146	i915_vma_unpin(batch);
1147err_unmap:
1148	i915_gem_object_unpin_map(obj);
1149	return err;
1150}
1151
1152static u32 *reloc_gpu(struct i915_execbuffer *eb,
1153		      struct i915_vma *vma,
1154		      unsigned int len)
1155{
1156	struct reloc_cache *cache = &eb->reloc_cache;
1157	u32 *cmd;
1158
1159	if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1))
1160		reloc_gpu_flush(cache);
1161
1162	if (unlikely(!cache->rq)) {
1163		int err;
1164
1165		/* If we need to copy for the cmdparser, we will stall anyway */
1166		if (eb_use_cmdparser(eb))
1167			return ERR_PTR(-EWOULDBLOCK);
1168
1169		if (!intel_engine_can_store_dword(eb->engine))
1170			return ERR_PTR(-ENODEV);
1171
1172		err = __reloc_gpu_alloc(eb, vma, len);
1173		if (unlikely(err))
1174			return ERR_PTR(err);
1175	}
1176
1177	cmd = cache->rq_cmd + cache->rq_size;
1178	cache->rq_size += len;
1179
1180	return cmd;
1181}
1182
1183static u64
1184relocate_entry(struct i915_vma *vma,
1185	       const struct drm_i915_gem_relocation_entry *reloc,
1186	       struct i915_execbuffer *eb,
1187	       const struct i915_vma *target)
1188{
1189	u64 offset = reloc->offset;
1190	u64 target_offset = relocation_target(reloc, target);
1191	bool wide = eb->reloc_cache.use_64bit_reloc;
1192	void *vaddr;
 
1193
1194	if (!eb->reloc_cache.vaddr &&
1195	    (DBG_FORCE_RELOC == FORCE_GPU_RELOC ||
1196	     !reservation_object_test_signaled_rcu(vma->resv, true))) {
1197		const unsigned int gen = eb->reloc_cache.gen;
1198		unsigned int len;
1199		u32 *batch;
1200		u64 addr;
1201
1202		if (wide)
1203			len = offset & 7 ? 8 : 5;
1204		else if (gen >= 4)
1205			len = 4;
1206		else
1207			len = 3;
1208
1209		batch = reloc_gpu(eb, vma, len);
1210		if (IS_ERR(batch))
1211			goto repeat;
1212
1213		addr = gen8_canonical_addr(vma->node.start + offset);
1214		if (wide) {
1215			if (offset & 7) {
1216				*batch++ = MI_STORE_DWORD_IMM_GEN4;
1217				*batch++ = lower_32_bits(addr);
1218				*batch++ = upper_32_bits(addr);
1219				*batch++ = lower_32_bits(target_offset);
1220
1221				addr = gen8_canonical_addr(addr + 4);
1222
1223				*batch++ = MI_STORE_DWORD_IMM_GEN4;
1224				*batch++ = lower_32_bits(addr);
1225				*batch++ = upper_32_bits(addr);
1226				*batch++ = upper_32_bits(target_offset);
1227			} else {
1228				*batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
1229				*batch++ = lower_32_bits(addr);
1230				*batch++ = upper_32_bits(addr);
1231				*batch++ = lower_32_bits(target_offset);
1232				*batch++ = upper_32_bits(target_offset);
1233			}
1234		} else if (gen >= 6) {
1235			*batch++ = MI_STORE_DWORD_IMM_GEN4;
1236			*batch++ = 0;
1237			*batch++ = addr;
1238			*batch++ = target_offset;
1239		} else if (gen >= 4) {
1240			*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1241			*batch++ = 0;
1242			*batch++ = addr;
1243			*batch++ = target_offset;
1244		} else {
1245			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
1246			*batch++ = addr;
1247			*batch++ = target_offset;
1248		}
1249
1250		goto out;
1251	}
1252
1253repeat:
1254	vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
1255	if (IS_ERR(vaddr))
1256		return PTR_ERR(vaddr);
1257
1258	clflush_write32(vaddr + offset_in_page(offset),
1259			lower_32_bits(target_offset),
1260			eb->reloc_cache.vaddr);
1261
1262	if (wide) {
1263		offset += sizeof(u32);
1264		target_offset >>= 32;
1265		wide = false;
1266		goto repeat;
1267	}
1268
1269out:
1270	return target->node.start | UPDATE;
1271}
1272
1273static u64
1274eb_relocate_entry(struct i915_execbuffer *eb,
1275		  struct i915_vma *vma,
1276		  const struct drm_i915_gem_relocation_entry *reloc)
1277{
1278	struct i915_vma *target;
1279	int err;
1280
1281	/* we've already hold a reference to all valid objects */
1282	target = eb_get_vma(eb, reloc->target_handle);
1283	if (unlikely(!target))
1284		return -ENOENT;
1285
1286	/* Validate that the target is in a valid r/w GPU domain */
1287	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
1288		DRM_DEBUG("reloc with multiple write domains: "
1289			  "target %d offset %d "
1290			  "read %08x write %08x",
1291			  reloc->target_handle,
1292			  (int) reloc->offset,
1293			  reloc->read_domains,
1294			  reloc->write_domain);
1295		return -EINVAL;
1296	}
1297	if (unlikely((reloc->write_domain | reloc->read_domains)
1298		     & ~I915_GEM_GPU_DOMAINS)) {
1299		DRM_DEBUG("reloc with read/write non-GPU domains: "
1300			  "target %d offset %d "
1301			  "read %08x write %08x",
1302			  reloc->target_handle,
1303			  (int) reloc->offset,
1304			  reloc->read_domains,
1305			  reloc->write_domain);
1306		return -EINVAL;
1307	}
1308
1309	if (reloc->write_domain) {
1310		*target->exec_flags |= EXEC_OBJECT_WRITE;
1311
1312		/*
1313		 * Sandybridge PPGTT errata: We need a global gtt mapping
1314		 * for MI and pipe_control writes because the gpu doesn't
1315		 * properly redirect them through the ppgtt for non_secure
1316		 * batchbuffers.
1317		 */
1318		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
1319		    IS_GEN6(eb->i915)) {
1320			err = i915_vma_bind(target, target->obj->cache_level,
1321					    PIN_GLOBAL);
1322			if (WARN_ONCE(err,
1323				      "Unexpected failure to bind target VMA!"))
1324				return err;
1325		}
1326	}
1327
1328	/*
1329	 * If the relocation already has the right value in it, no
1330	 * more work needs to be done.
1331	 */
1332	if (!DBG_FORCE_RELOC &&
1333	    gen8_canonical_addr(target->node.start) == reloc->presumed_offset)
1334		return 0;
1335
1336	/* Check that the relocation address is valid... */
1337	if (unlikely(reloc->offset >
1338		     vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
1339		DRM_DEBUG("Relocation beyond object bounds: "
1340			  "target %d offset %d size %d.\n",
1341			  reloc->target_handle,
1342			  (int)reloc->offset,
1343			  (int)vma->size);
1344		return -EINVAL;
1345	}
1346	if (unlikely(reloc->offset & 3)) {
1347		DRM_DEBUG("Relocation not 4-byte aligned: "
1348			  "target %d offset %d.\n",
1349			  reloc->target_handle,
1350			  (int)reloc->offset);
1351		return -EINVAL;
1352	}
1353
1354	/*
1355	 * If we write into the object, we need to force the synchronisation
1356	 * barrier, either with an asynchronous clflush or if we executed the
1357	 * patching using the GPU (though that should be serialised by the
1358	 * timeline). To be completely sure, and since we are required to
1359	 * do relocations we are already stalling, disable the user's opt
1360	 * out of our synchronisation.
1361	 */
1362	*vma->exec_flags &= ~EXEC_OBJECT_ASYNC;
 
 
 
 
 
 
 
 
1363
1364	/* and update the user's relocation entry */
1365	return relocate_entry(vma, reloc, eb, target);
 
 
1366}
1367
1368static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 
 
1369{
1370#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
1371	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
1372	struct drm_i915_gem_relocation_entry __user *urelocs;
1373	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1374	unsigned int remain;
 
 
1375
1376	urelocs = u64_to_user_ptr(entry->relocs_ptr);
1377	remain = entry->relocation_count;
1378	if (unlikely(remain > N_RELOC(ULONG_MAX)))
1379		return -EINVAL;
 
 
 
 
1380
1381	/*
1382	 * We must check that the entire relocation array is safe
1383	 * to read. However, if the array is not writable the user loses
1384	 * the updated relocation values.
1385	 */
1386	if (unlikely(!access_ok(VERIFY_READ, urelocs, remain*sizeof(*urelocs))))
1387		return -EFAULT;
1388
1389	do {
1390		struct drm_i915_gem_relocation_entry *r = stack;
1391		unsigned int count =
1392			min_t(unsigned int, remain, ARRAY_SIZE(stack));
1393		unsigned int copied;
1394
1395		/*
1396		 * This is the fast path and we cannot handle a pagefault
1397		 * whilst holding the struct mutex lest the user pass in the
1398		 * relocations contained within a mmaped bo. For in such a case
1399		 * we, the page fault handler would call i915_gem_fault() and
1400		 * we would try to acquire the struct mutex again. Obviously
1401		 * this is bad and so lockdep complains vehemently.
1402		 */
1403		pagefault_disable();
1404		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
1405		pagefault_enable();
1406		if (unlikely(copied)) {
1407			remain = -EFAULT;
1408			goto out;
1409		}
1410
1411		remain -= count;
1412		do {
1413			u64 offset = eb_relocate_entry(eb, vma, r);
 
1414
1415			if (likely(offset == 0)) {
1416			} else if ((s64)offset < 0) {
1417				remain = (int)offset;
1418				goto out;
1419			} else {
1420				/*
1421				 * Note that reporting an error now
1422				 * leaves everything in an inconsistent
1423				 * state as we have *already* changed
1424				 * the relocation value inside the
1425				 * object. As we have not changed the
1426				 * reloc.presumed_offset or will not
1427				 * change the execobject.offset, on the
1428				 * call we may not rewrite the value
1429				 * inside the object, leaving it
1430				 * dangling and causing a GPU hang. Unless
1431				 * userspace dynamically rebuilds the
1432				 * relocations on each execbuf rather than
1433				 * presume a static tree.
1434				 *
1435				 * We did previously check if the relocations
1436				 * were writable (access_ok), an error now
1437				 * would be a strange race with mprotect,
1438				 * having already demonstrated that we
1439				 * can read from this userspace address.
1440				 */
1441				offset = gen8_canonical_addr(offset & ~UPDATE);
1442				__put_user(offset,
1443					   &urelocs[r-stack].presumed_offset);
1444			}
1445		} while (r++, --count);
1446		urelocs += ARRAY_SIZE(stack);
1447	} while (remain);
1448out:
1449	reloc_cache_reset(&eb->reloc_cache);
1450	return remain;
1451}
1452
1453static int
1454eb_relocate_vma_slow(struct i915_execbuffer *eb, struct i915_vma *vma)
 
 
1455{
1456	const struct drm_i915_gem_exec_object2 *entry = exec_entry(eb, vma);
1457	struct drm_i915_gem_relocation_entry *relocs =
1458		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1459	unsigned int i;
1460	int err;
1461
1462	for (i = 0; i < entry->relocation_count; i++) {
1463		u64 offset = eb_relocate_entry(eb, vma, &relocs[i]);
 
 
 
 
 
 
1464
1465		if ((s64)offset < 0) {
1466			err = (int)offset;
1467			goto err;
1468		}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1469	}
1470	err = 0;
1471err:
1472	reloc_cache_reset(&eb->reloc_cache);
1473	return err;
1474}
1475
1476static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
1477{
1478	const char __user *addr, *end;
1479	unsigned long size;
1480	char __maybe_unused c;
1481
1482	size = entry->relocation_count;
1483	if (size == 0)
1484		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1485
1486	if (size > N_RELOC(ULONG_MAX))
1487		return -EINVAL;
 
1488
1489	addr = u64_to_user_ptr(entry->relocs_ptr);
1490	size *= sizeof(struct drm_i915_gem_relocation_entry);
1491	if (!access_ok(VERIFY_READ, addr, size))
1492		return -EFAULT;
1493
1494	end = addr + size;
1495	for (; addr < end; addr += PAGE_SIZE) {
1496		int err = __get_user(c, addr);
1497		if (err)
1498			return err;
1499	}
1500	return __get_user(c, end - 1);
 
1501}
1502
1503static int eb_copy_relocations(const struct i915_execbuffer *eb)
 
1504{
1505	const unsigned int count = eb->buffer_count;
1506	unsigned int i;
1507	int err;
1508
1509	for (i = 0; i < count; i++) {
1510		const unsigned int nreloc = eb->exec[i].relocation_count;
1511		struct drm_i915_gem_relocation_entry __user *urelocs;
1512		struct drm_i915_gem_relocation_entry *relocs;
1513		unsigned long size;
1514		unsigned long copied;
1515
1516		if (nreloc == 0)
1517			continue;
1518
1519		err = check_relocations(&eb->exec[i]);
1520		if (err)
1521			goto err;
1522
1523		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
1524		size = nreloc * sizeof(*relocs);
1525
1526		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
1527		if (!relocs) {
1528			kvfree(relocs);
1529			err = -ENOMEM;
1530			goto err;
1531		}
 
 
1532
1533		/* copy_from_user is limited to < 4GiB */
1534		copied = 0;
1535		do {
1536			unsigned int len =
1537				min_t(u64, BIT_ULL(31), size - copied);
1538
1539			if (__copy_from_user((char *)relocs + copied,
1540					     (char __user *)urelocs + copied,
1541					     len)) {
1542				kvfree(relocs);
1543				err = -EFAULT;
1544				goto err;
1545			}
1546
1547			copied += len;
1548		} while (copied < size);
 
1549
1550		/*
1551		 * As we do not update the known relocation offsets after
1552		 * relocating (due to the complexities in lock handling),
1553		 * we need to mark them as invalid now so that we force the
1554		 * relocation processing next time. Just in case the target
1555		 * object is evicted and then rebound into its old
1556		 * presumed_offset before the next execbuffer - if that
1557		 * happened we would make the mistake of assuming that the
1558		 * relocations were valid.
1559		 */
1560		user_access_begin();
1561		for (copied = 0; copied < nreloc; copied++)
1562			unsafe_put_user(-1,
1563					&urelocs[copied].presumed_offset,
1564					end_user);
1565end_user:
1566		user_access_end();
1567
1568		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
1569	}
 
1570
1571	return 0;
 
 
1572
1573err:
1574	while (i--) {
1575		struct drm_i915_gem_relocation_entry *relocs =
1576			u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
1577		if (eb->exec[i].relocation_count)
1578			kvfree(relocs);
1579	}
1580	return err;
1581}
1582
1583static int eb_prefault_relocations(const struct i915_execbuffer *eb)
 
 
 
 
1584{
1585	const unsigned int count = eb->buffer_count;
1586	unsigned int i;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1587
1588	if (unlikely(i915_modparams.prefault_disable))
1589		return 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1590
1591	for (i = 0; i < count; i++) {
1592		int err;
 
 
1593
1594		err = check_relocations(&eb->exec[i]);
1595		if (err)
1596			return err;
1597	}
1598
1599	return 0;
 
 
 
 
 
 
 
1600}
1601
1602static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
 
 
 
 
 
 
 
1603{
1604	struct drm_device *dev = &eb->i915->drm;
1605	bool have_copy = false;
1606	struct i915_vma *vma;
1607	int err = 0;
 
 
 
1608
1609repeat:
1610	if (signal_pending(current)) {
1611		err = -ERESTARTSYS;
1612		goto out;
 
 
 
 
1613	}
1614
1615	/* We may process another execbuffer during the unlock... */
1616	eb_reset_vmas(eb);
1617	mutex_unlock(&dev->struct_mutex);
1618
1619	/*
1620	 * We take 3 passes through the slowpatch.
1621	 *
1622	 * 1 - we try to just prefault all the user relocation entries and
1623	 * then attempt to reuse the atomic pagefault disabled fast path again.
1624	 *
1625	 * 2 - we copy the user entries to a local buffer here outside of the
1626	 * local and allow ourselves to wait upon any rendering before
1627	 * relocations
1628	 *
1629	 * 3 - we already have a local copy of the relocation entries, but
1630	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
1631	 */
1632	if (!err) {
1633		err = eb_prefault_relocations(eb);
1634	} else if (!have_copy) {
1635		err = eb_copy_relocations(eb);
1636		have_copy = err == 0;
1637	} else {
1638		cond_resched();
1639		err = 0;
1640	}
1641	if (err) {
1642		mutex_lock(&dev->struct_mutex);
1643		goto out;
1644	}
1645
1646	/* A frequent cause for EAGAIN are currently unavailable client pages */
1647	flush_workqueue(eb->i915->mm.userptr_wq);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1648
1649	err = i915_mutex_lock_interruptible(dev);
1650	if (err) {
1651		mutex_lock(&dev->struct_mutex);
1652		goto out;
1653	}
1654
1655	/* reacquire the objects */
1656	err = eb_lookup_vmas(eb);
1657	if (err)
 
1658		goto err;
1659
1660	GEM_BUG_ON(!eb->batch);
 
 
 
1661
1662	list_for_each_entry(vma, &eb->relocs, reloc_link) {
1663		if (!have_copy) {
1664			pagefault_disable();
1665			err = eb_relocate_vma(eb, vma);
1666			pagefault_enable();
1667			if (err)
1668				goto repeat;
1669		} else {
1670			err = eb_relocate_vma_slow(eb, vma);
1671			if (err)
1672				goto err;
1673		}
1674	}
1675
1676	/*
1677	 * Leave the user relocations as are, this is the painfully slow path,
1678	 * and we want to avoid the complication of dropping the lock whilst
1679	 * having buffers reserved in the aperture and so causing spurious
1680	 * ENOSPC for random operations.
1681	 */
1682
1683err:
1684	if (err == -EAGAIN)
1685		goto repeat;
 
 
1686
1687out:
1688	if (have_copy) {
1689		const unsigned int count = eb->buffer_count;
1690		unsigned int i;
1691
1692		for (i = 0; i < count; i++) {
1693			const struct drm_i915_gem_exec_object2 *entry =
1694				&eb->exec[i];
1695			struct drm_i915_gem_relocation_entry *relocs;
1696
1697			if (!entry->relocation_count)
1698				continue;
1699
1700			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
1701			kvfree(relocs);
 
 
1702		}
 
 
 
 
 
1703	}
1704
1705	return err;
 
 
 
 
 
 
 
 
 
1706}
1707
1708static int eb_relocate(struct i915_execbuffer *eb)
 
1709{
1710	if (eb_lookup_vmas(eb))
1711		goto slow;
1712
1713	/* The objects are in their final locations, apply the relocations. */
1714	if (eb->args->flags & __EXEC_HAS_RELOC) {
1715		struct i915_vma *vma;
1716
1717		list_for_each_entry(vma, &eb->relocs, reloc_link) {
1718			if (eb_relocate_vma(eb, vma))
1719				goto slow;
1720		}
1721	}
 
 
1722
1723	return 0;
 
1724
1725slow:
1726	return eb_relocate_slow(eb);
1727}
1728
1729static void eb_export_fence(struct i915_vma *vma,
1730			    struct i915_request *rq,
1731			    unsigned int flags)
1732{
1733	struct reservation_object *resv = vma->resv;
1734
1735	/*
1736	 * Ignore errors from failing to allocate the new fence, we can't
1737	 * handle an error right now. Worst case should be missed
1738	 * synchronisation leading to rendering corruption.
1739	 */
1740	reservation_object_lock(resv, NULL);
1741	if (flags & EXEC_OBJECT_WRITE)
1742		reservation_object_add_excl_fence(resv, &rq->fence);
1743	else if (reservation_object_reserve_shared(resv) == 0)
1744		reservation_object_add_shared_fence(resv, &rq->fence);
1745	reservation_object_unlock(resv);
1746}
1747
1748static int eb_move_to_gpu(struct i915_execbuffer *eb)
1749{
1750	const unsigned int count = eb->buffer_count;
1751	unsigned int i;
1752	int err;
1753
1754	for (i = 0; i < count; i++) {
1755		unsigned int flags = eb->flags[i];
1756		struct i915_vma *vma = eb->vma[i];
1757		struct drm_i915_gem_object *obj = vma->obj;
1758
1759		if (flags & EXEC_OBJECT_CAPTURE) {
1760			struct i915_capture_list *capture;
1761
1762			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
1763			if (unlikely(!capture))
1764				return -ENOMEM;
1765
1766			capture->next = eb->request->capture_list;
1767			capture->vma = eb->vma[i];
1768			eb->request->capture_list = capture;
1769		}
1770
1771		/*
1772		 * If the GPU is not _reading_ through the CPU cache, we need
1773		 * to make sure that any writes (both previous GPU writes from
1774		 * before a change in snooping levels and normal CPU writes)
1775		 * caught in that cache are flushed to main memory.
1776		 *
1777		 * We want to say
1778		 *   obj->cache_dirty &&
1779		 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
1780		 * but gcc's optimiser doesn't handle that as well and emits
1781		 * two jumps instead of one. Maybe one day...
1782		 */
1783		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
1784			if (i915_gem_clflush_object(obj, 0))
1785				flags &= ~EXEC_OBJECT_ASYNC;
 
 
 
 
 
 
 
1786		}
1787
1788		if (flags & EXEC_OBJECT_ASYNC)
1789			continue;
1790
1791		err = i915_request_await_object
1792			(eb->request, obj, flags & EXEC_OBJECT_WRITE);
1793		if (err)
1794			return err;
1795	}
1796
1797	for (i = 0; i < count; i++) {
1798		unsigned int flags = eb->flags[i];
1799		struct i915_vma *vma = eb->vma[i];
 
 
 
 
1800
1801		i915_vma_move_to_active(vma, eb->request, flags);
1802		eb_export_fence(vma, eb->request, flags);
 
 
 
 
 
 
 
1803
1804		__eb_unreserve_vma(vma, flags);
1805		vma->exec_flags = NULL;
1806
1807		if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
1808			i915_vma_put(vma);
1809	}
1810	eb->exec = NULL;
1811
1812	/* Unconditionally flush any chipset caches (for streaming writes). */
1813	i915_gem_chipset_flush(eb->i915);
1814
1815	return 0;
1816}
1817
1818static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 
 
1819{
1820	if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
1821		return false;
 
 
 
 
 
 
 
1822
1823	/* Kernel clipping was a DRI1 misfeature */
1824	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
1825		if (exec->num_cliprects || exec->cliprects_ptr)
1826			return false;
1827	}
1828
1829	if (exec->DR4 == 0xffffffff) {
1830		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1831		exec->DR4 = 0;
 
 
 
1832	}
1833	if (exec->DR1 || exec->DR4)
1834		return false;
1835
1836	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
1837		return false;
1838
1839	return true;
1840}
1841
1842void i915_vma_move_to_active(struct i915_vma *vma,
1843			     struct i915_request *rq,
1844			     unsigned int flags)
1845{
1846	struct drm_i915_gem_object *obj = vma->obj;
1847	const unsigned int idx = rq->engine->id;
1848
1849	lockdep_assert_held(&rq->i915->drm.struct_mutex);
1850	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
 
 
1851
1852	/*
1853	 * Add a reference if we're newly entering the active list.
1854	 * The order in which we add operations to the retirement queue is
1855	 * vital here: mark_active adds to the start of the callback list,
1856	 * such that subsequent callbacks are called first. Therefore we
1857	 * add the active reference first and queue for it to be dropped
1858	 * *last*.
1859	 */
1860	if (!i915_vma_is_active(vma))
1861		obj->active_count++;
1862	i915_vma_set_active(vma, idx);
1863	i915_gem_active_set(&vma->last_read[idx], rq);
1864	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 
 
 
 
 
 
 
 
 
 
1865
1866	obj->write_domain = 0;
1867	if (flags & EXEC_OBJECT_WRITE) {
1868		obj->write_domain = I915_GEM_DOMAIN_RENDER;
1869
1870		if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
1871			i915_gem_active_set(&obj->frontbuffer_write, rq);
 
 
 
1872
1873		obj->read_domains = 0;
1874	}
1875	obj->read_domains |= I915_GEM_GPU_DOMAINS;
1876
1877	if (flags & EXEC_OBJECT_NEEDS_FENCE)
1878		i915_gem_active_set(&vma->last_fence, rq);
1879}
1880
1881static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
 
 
1882{
1883	u32 *cs;
1884	int i;
 
1885
1886	if (!IS_GEN7(rq->i915) || rq->engine->id != RCS) {
1887		DRM_DEBUG("sol reset is gen7/rcs only\n");
1888		return -EINVAL;
1889	}
1890
1891	cs = intel_ring_begin(rq, 4 * 2 + 2);
1892	if (IS_ERR(cs))
1893		return PTR_ERR(cs);
1894
1895	*cs++ = MI_LOAD_REGISTER_IMM(4);
1896	for (i = 0; i < 4; i++) {
1897		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1898		*cs++ = 0;
 
1899	}
1900	*cs++ = MI_NOOP;
1901	intel_ring_advance(rq, cs);
1902
1903	return 0;
1904}
1905
1906static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
 
 
 
 
 
 
 
1907{
1908	struct drm_i915_gem_object *shadow_batch_obj;
1909	struct i915_vma *vma;
1910	int err;
1911
1912	shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool,
1913						   PAGE_ALIGN(eb->batch_len));
1914	if (IS_ERR(shadow_batch_obj))
1915		return ERR_CAST(shadow_batch_obj);
 
 
 
 
 
 
 
 
 
 
 
 
 
1916
1917	err = intel_engine_cmd_parser(eb->engine,
1918				      eb->batch->obj,
1919				      shadow_batch_obj,
1920				      eb->batch_start_offset,
1921				      eb->batch_len,
1922				      is_master);
1923	if (err) {
1924		if (err == -EACCES) /* unhandled chained batch */
1925			vma = NULL;
1926		else
1927			vma = ERR_PTR(err);
1928		goto out;
1929	}
1930
1931	vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0);
1932	if (IS_ERR(vma))
1933		goto out;
1934
1935	eb->vma[eb->buffer_count] = i915_vma_get(vma);
1936	eb->flags[eb->buffer_count] =
1937		__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
1938	vma->exec_flags = &eb->flags[eb->buffer_count];
1939	eb->buffer_count++;
1940
1941out:
1942	i915_gem_object_unpin_pages(shadow_batch_obj);
1943	return vma;
 
 
 
1944}
1945
1946static void
1947add_to_client(struct i915_request *rq, struct drm_file *file)
1948{
1949	rq->file_priv = file->driver_priv;
1950	list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
1951}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1952
1953static int eb_submit(struct i915_execbuffer *eb)
1954{
1955	int err;
 
 
1956
1957	err = eb_move_to_gpu(eb);
1958	if (err)
1959		return err;
1960
1961	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
1962		err = i915_reset_gen7_sol_offsets(eb->request);
1963		if (err)
1964			return err;
1965	}
1966
1967	err = eb->engine->emit_bb_start(eb->request,
1968					eb->batch->node.start +
1969					eb->batch_start_offset,
1970					eb->batch_len,
1971					eb->batch_flags);
1972	if (err)
1973		return err;
 
 
 
 
 
 
 
 
 
 
1974
1975	return 0;
1976}
1977
1978/*
1979 * Find one BSD ring to dispatch the corresponding BSD command.
1980 * The engine index is returned.
1981 */
1982static unsigned int
1983gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
1984			 struct drm_file *file)
1985{
1986	struct drm_i915_file_private *file_priv = file->driver_priv;
1987
1988	/* Check whether the file_priv has already selected one ring. */
1989	if ((int)file_priv->bsd_engine < 0)
1990		file_priv->bsd_engine = atomic_fetch_xor(1,
1991			 &dev_priv->mm.bsd_engine_dispatch_index);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1992
1993	return file_priv->bsd_engine;
1994}
1995
1996#define I915_USER_RINGS (4)
1997
1998static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
1999	[I915_EXEC_DEFAULT]	= RCS,
2000	[I915_EXEC_RENDER]	= RCS,
2001	[I915_EXEC_BLT]		= BCS,
2002	[I915_EXEC_BSD]		= VCS,
2003	[I915_EXEC_VEBOX]	= VECS
2004};
2005
2006static struct intel_engine_cs *
2007eb_select_engine(struct drm_i915_private *dev_priv,
2008		 struct drm_file *file,
2009		 struct drm_i915_gem_execbuffer2 *args)
 
2010{
2011	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
2012	struct intel_engine_cs *engine;
2013
2014	if (user_ring_id > I915_USER_RINGS) {
2015		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
2016		return NULL;
2017	}
2018
2019	if ((user_ring_id != I915_EXEC_BSD) &&
2020	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
2021		DRM_DEBUG("execbuf with non bsd ring but with invalid "
2022			  "bsd dispatch flags: %d\n", (int)(args->flags));
2023		return NULL;
2024	}
2025
2026	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
2027		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
2028
2029		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
2030			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
2031		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
2032			   bsd_idx <= I915_EXEC_BSD_RING2) {
2033			bsd_idx >>= I915_EXEC_BSD_SHIFT;
2034			bsd_idx--;
2035		} else {
2036			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
2037				  bsd_idx);
2038			return NULL;
2039		}
2040
2041		engine = dev_priv->engine[_VCS(bsd_idx)];
2042	} else {
2043		engine = dev_priv->engine[user_ring_map[user_ring_id]];
2044	}
2045
2046	if (!engine) {
2047		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
2048		return NULL;
2049	}
2050
2051	return engine;
2052}
2053
2054static void
2055__free_fence_array(struct drm_syncobj **fences, unsigned int n)
2056{
2057	while (n--)
2058		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
2059	kvfree(fences);
2060}
2061
2062static struct drm_syncobj **
2063get_fence_array(struct drm_i915_gem_execbuffer2 *args,
2064		struct drm_file *file)
2065{
2066	const unsigned long nfences = args->num_cliprects;
2067	struct drm_i915_gem_exec_fence __user *user;
2068	struct drm_syncobj **fences;
2069	unsigned long n;
2070	int err;
2071
2072	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
2073		return NULL;
2074
2075	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
2076	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
2077	if (nfences > min_t(unsigned long,
2078			    ULONG_MAX / sizeof(*user),
2079			    SIZE_MAX / sizeof(*fences)))
2080		return ERR_PTR(-EINVAL);
2081
2082	user = u64_to_user_ptr(args->cliprects_ptr);
2083	if (!access_ok(VERIFY_READ, user, nfences * sizeof(*user)))
2084		return ERR_PTR(-EFAULT);
2085
2086	fences = kvmalloc_array(nfences, sizeof(*fences),
2087				__GFP_NOWARN | GFP_KERNEL);
2088	if (!fences)
2089		return ERR_PTR(-ENOMEM);
2090
2091	for (n = 0; n < nfences; n++) {
2092		struct drm_i915_gem_exec_fence fence;
2093		struct drm_syncobj *syncobj;
2094
2095		if (__copy_from_user(&fence, user++, sizeof(fence))) {
2096			err = -EFAULT;
2097			goto err;
2098		}
2099
2100		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
2101			err = -EINVAL;
2102			goto err;
2103		}
2104
2105		syncobj = drm_syncobj_find(file, fence.handle);
2106		if (!syncobj) {
2107			DRM_DEBUG("Invalid syncobj handle provided\n");
2108			err = -ENOENT;
2109			goto err;
2110		}
2111
2112		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
2113			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
2114
2115		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
2116	}
2117
2118	return fences;
2119
2120err:
2121	__free_fence_array(fences, n);
2122	return ERR_PTR(err);
2123}
2124
2125static void
2126put_fence_array(struct drm_i915_gem_execbuffer2 *args,
2127		struct drm_syncobj **fences)
2128{
2129	if (fences)
2130		__free_fence_array(fences, args->num_cliprects);
2131}
2132
2133static int
2134await_fence_array(struct i915_execbuffer *eb,
2135		  struct drm_syncobj **fences)
2136{
2137	const unsigned int nfences = eb->args->num_cliprects;
2138	unsigned int n;
2139	int err;
2140
2141	for (n = 0; n < nfences; n++) {
2142		struct drm_syncobj *syncobj;
2143		struct dma_fence *fence;
2144		unsigned int flags;
2145
2146		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2147		if (!(flags & I915_EXEC_FENCE_WAIT))
2148			continue;
2149
2150		fence = drm_syncobj_fence_get(syncobj);
2151		if (!fence)
2152			return -EINVAL;
2153
2154		err = i915_request_await_dma_fence(eb->request, fence);
2155		dma_fence_put(fence);
2156		if (err < 0)
2157			return err;
2158	}
2159
2160	return 0;
2161}
2162
2163static void
2164signal_fence_array(struct i915_execbuffer *eb,
2165		   struct drm_syncobj **fences)
2166{
2167	const unsigned int nfences = eb->args->num_cliprects;
2168	struct dma_fence * const fence = &eb->request->fence;
2169	unsigned int n;
2170
2171	for (n = 0; n < nfences; n++) {
2172		struct drm_syncobj *syncobj;
2173		unsigned int flags;
2174
2175		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
2176		if (!(flags & I915_EXEC_FENCE_SIGNAL))
2177			continue;
2178
2179		drm_syncobj_replace_fence(syncobj, fence);
2180	}
2181}
2182
2183static int
2184i915_gem_do_execbuffer(struct drm_device *dev,
2185		       struct drm_file *file,
2186		       struct drm_i915_gem_execbuffer2 *args,
2187		       struct drm_i915_gem_exec_object2 *exec,
2188		       struct drm_syncobj **fences)
2189{
2190	struct i915_execbuffer eb;
2191	struct dma_fence *in_fence = NULL;
2192	struct sync_file *out_fence = NULL;
2193	int out_fence_fd = -1;
2194	int err;
2195
2196	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
2197	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
2198		     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
2199
2200	eb.i915 = to_i915(dev);
2201	eb.file = file;
2202	eb.args = args;
2203	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
2204		args->flags |= __EXEC_HAS_RELOC;
2205
2206	eb.exec = exec;
2207	eb.vma = (struct i915_vma **)(exec + args->buffer_count + 1);
2208	eb.vma[0] = NULL;
2209	eb.flags = (unsigned int *)(eb.vma + args->buffer_count + 1);
2210
2211	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
2212	if (USES_FULL_PPGTT(eb.i915))
2213		eb.invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
2214	reloc_cache_init(&eb.reloc_cache, eb.i915);
2215
2216	eb.buffer_count = args->buffer_count;
2217	eb.batch_start_offset = args->batch_start_offset;
2218	eb.batch_len = args->batch_len;
2219
2220	eb.batch_flags = 0;
 
 
 
 
 
 
 
2221	if (args->flags & I915_EXEC_SECURE) {
2222		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
2223		    return -EPERM;
2224
2225		eb.batch_flags |= I915_DISPATCH_SECURE;
2226	}
2227	if (args->flags & I915_EXEC_IS_PINNED)
2228		eb.batch_flags |= I915_DISPATCH_PINNED;
 
 
 
 
2229
2230	eb.engine = eb_select_engine(eb.i915, file, args);
2231	if (!eb.engine)
2232		return -EINVAL;
 
2233
2234	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
2235		if (!HAS_RESOURCE_STREAMER(eb.i915)) {
2236			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
2237			return -EINVAL;
2238		}
2239		if (eb.engine->id != RCS) {
2240			DRM_DEBUG("RS is not available on %s\n",
2241				 eb.engine->name);
2242			return -EINVAL;
2243		}
2244
2245		eb.batch_flags |= I915_DISPATCH_RS;
2246	}
2247
2248	if (args->flags & I915_EXEC_FENCE_IN) {
2249		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
2250		if (!in_fence)
2251			return -EINVAL;
 
 
 
 
 
 
 
2252	}
2253
2254	if (args->flags & I915_EXEC_FENCE_OUT) {
2255		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
2256		if (out_fence_fd < 0) {
2257			err = out_fence_fd;
2258			goto err_in_fence;
2259		}
2260	}
2261
2262	err = eb_create(&eb);
2263	if (err)
2264		goto err_out_fence;
 
2265
2266	GEM_BUG_ON(!eb.lut_size);
2267
2268	err = eb_select_context(&eb);
2269	if (unlikely(err))
2270		goto err_destroy;
 
 
 
 
2271
2272	/*
2273	 * Take a local wakeref for preparing to dispatch the execbuf as
2274	 * we expect to access the hardware fairly frequently in the
2275	 * process. Upon first dispatch, we acquire another prolonged
2276	 * wakeref that we hold until the GPU has been idle for at least
2277	 * 100ms.
2278	 */
2279	intel_runtime_pm_get(eb.i915);
2280
2281	err = i915_mutex_lock_interruptible(dev);
2282	if (err)
2283		goto err_rpm;
 
 
2284
2285	err = eb_relocate(&eb);
2286	if (err) {
2287		/*
2288		 * If the user expects the execobject.offset and
2289		 * reloc.presumed_offset to be an exact match,
2290		 * as for using NO_RELOC, then we cannot update
2291		 * the execobject.offset until we have completed
2292		 * relocation.
2293		 */
2294		args->flags &= ~__EXEC_HAS_RELOC;
2295		goto err_vma;
2296	}
2297
2298	if (unlikely(*eb.batch->exec_flags & EXEC_OBJECT_WRITE)) {
 
2299		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
2300		err = -EINVAL;
2301		goto err_vma;
2302	}
2303	if (eb.batch_start_offset > eb.batch->size ||
2304	    eb.batch_len > eb.batch->size - eb.batch_start_offset) {
2305		DRM_DEBUG("Attempting to use out-of-bounds batch\n");
2306		err = -EINVAL;
2307		goto err_vma;
2308	}
2309
2310	if (eb_use_cmdparser(&eb)) {
2311		struct i915_vma *vma;
 
 
 
 
 
 
 
 
 
 
 
 
 
2312
2313		vma = eb_parse(&eb, drm_is_current_master(file));
2314		if (IS_ERR(vma)) {
2315			err = PTR_ERR(vma);
2316			goto err_vma;
2317		}
2318
2319		if (vma) {
2320			/*
2321			 * Batch parsed and accepted:
2322			 *
2323			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
2324			 * bit from MI_BATCH_BUFFER_START commands issued in
2325			 * the dispatch_execbuffer implementations. We
2326			 * specifically don't want that set on batches the
2327			 * command parser has accepted.
2328			 */
2329			eb.batch_flags |= I915_DISPATCH_SECURE;
2330			eb.batch_start_offset = 0;
2331			eb.batch = vma;
2332		}
2333	}
2334
2335	if (eb.batch_len == 0)
2336		eb.batch_len = eb.batch->size - eb.batch_start_offset;
2337
2338	/*
2339	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
2340	 * batch" bit. Hence we need to pin secure batches into the global gtt.
2341	 * hsw should have this fixed, but bdw mucks it up again. */
2342	if (eb.batch_flags & I915_DISPATCH_SECURE) {
2343		struct i915_vma *vma;
2344
2345		/*
2346		 * So on first glance it looks freaky that we pin the batch here
2347		 * outside of the reservation loop. But:
2348		 * - The batch is already pinned into the relevant ppgtt, so we
2349		 *   already have the backing storage fully allocated.
2350		 * - No other BO uses the global gtt (well contexts, but meh),
2351		 *   so we don't really have issues with multiple objects not
2352		 *   fitting due to fragmentation.
2353		 * So this is actually safe.
2354		 */
2355		vma = i915_gem_object_ggtt_pin(eb.batch->obj, NULL, 0, 0, 0);
2356		if (IS_ERR(vma)) {
2357			err = PTR_ERR(vma);
2358			goto err_vma;
2359		}
2360
2361		eb.batch = vma;
2362	}
2363
2364	/* All GPU relocation batches must be submitted prior to the user rq */
2365	GEM_BUG_ON(eb.reloc_cache.rq);
2366
2367	/* Allocate a request for this batch buffer nice and early. */
2368	eb.request = i915_request_alloc(eb.engine, eb.ctx);
2369	if (IS_ERR(eb.request)) {
2370		err = PTR_ERR(eb.request);
2371		goto err_batch_unpin;
2372	}
2373
2374	if (in_fence) {
2375		err = i915_request_await_dma_fence(eb.request, in_fence);
2376		if (err < 0)
2377			goto err_request;
2378	}
2379
2380	if (fences) {
2381		err = await_fence_array(&eb, fences);
2382		if (err)
2383			goto err_request;
2384	}
2385
2386	if (out_fence_fd != -1) {
2387		out_fence = sync_file_create(&eb.request->fence);
2388		if (!out_fence) {
2389			err = -ENOMEM;
2390			goto err_request;
2391		}
2392	}
2393
2394	/*
2395	 * Whilst this request exists, batch_obj will be on the
2396	 * active_list, and so will hold the active reference. Only when this
2397	 * request is retired will the the batch_obj be moved onto the
2398	 * inactive_list and lose its active reference. Hence we do not need
2399	 * to explicitly hold another reference here.
2400	 */
2401	eb.request->batch = eb.batch;
 
 
 
 
 
 
2402
2403	trace_i915_request_queue(eb.request, eb.batch_flags);
2404	err = eb_submit(&eb);
2405err_request:
2406	__i915_request_add(eb.request, err == 0);
2407	add_to_client(eb.request, file);
2408
2409	if (fences)
2410		signal_fence_array(&eb, fences);
2411
2412	if (out_fence) {
2413		if (err == 0) {
2414			fd_install(out_fence_fd, out_fence->file);
2415			args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
2416			args->rsvd2 |= (u64)out_fence_fd << 32;
2417			out_fence_fd = -1;
2418		} else {
2419			fput(out_fence->file);
2420		}
2421	}
2422
2423err_batch_unpin:
2424	if (eb.batch_flags & I915_DISPATCH_SECURE)
2425		i915_vma_unpin(eb.batch);
2426err_vma:
2427	if (eb.exec)
2428		eb_release_vmas(&eb);
2429	mutex_unlock(&dev->struct_mutex);
2430err_rpm:
2431	intel_runtime_pm_put(eb.i915);
2432	i915_gem_context_put(eb.ctx);
2433err_destroy:
2434	eb_destroy(&eb);
2435err_out_fence:
2436	if (out_fence_fd != -1)
2437		put_unused_fd(out_fence_fd);
2438err_in_fence:
2439	dma_fence_put(in_fence);
2440	return err;
2441}
2442
2443static size_t eb_element_size(void)
2444{
2445	return (sizeof(struct drm_i915_gem_exec_object2) +
2446		sizeof(struct i915_vma *) +
2447		sizeof(unsigned int));
2448}
2449
2450static bool check_buffer_count(size_t count)
2451{
2452	const size_t sz = eb_element_size();
2453
2454	/*
2455	 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
2456	 * array size (see eb_create()). Otherwise, we can accept an array as
2457	 * large as can be addressed (though use large arrays at your peril)!
2458	 */
 
 
 
 
2459
2460	return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
 
 
 
 
2461}
2462
2463/*
2464 * Legacy execbuffer just creates an exec2 list from the original exec object
2465 * list array and passes it to the real function.
2466 */
2467int
2468i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
2469			  struct drm_file *file)
2470{
2471	struct drm_i915_gem_execbuffer *args = data;
2472	struct drm_i915_gem_execbuffer2 exec2;
2473	struct drm_i915_gem_exec_object *exec_list = NULL;
2474	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
2475	const size_t count = args->buffer_count;
2476	unsigned int i;
2477	int err;
2478
2479	if (!check_buffer_count(count)) {
2480		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
2481		return -EINVAL;
2482	}
2483
2484	exec2.buffers_ptr = args->buffers_ptr;
2485	exec2.buffer_count = args->buffer_count;
2486	exec2.batch_start_offset = args->batch_start_offset;
2487	exec2.batch_len = args->batch_len;
2488	exec2.DR1 = args->DR1;
2489	exec2.DR4 = args->DR4;
2490	exec2.num_cliprects = args->num_cliprects;
2491	exec2.cliprects_ptr = args->cliprects_ptr;
2492	exec2.flags = I915_EXEC_RENDER;
2493	i915_execbuffer2_set_context_id(exec2, 0);
2494
2495	if (!i915_gem_check_execbuffer(&exec2))
2496		return -EINVAL;
2497
2498	/* Copy in the exec list from userland */
2499	exec_list = kvmalloc_array(count, sizeof(*exec_list),
2500				   __GFP_NOWARN | GFP_KERNEL);
2501	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2502				    __GFP_NOWARN | GFP_KERNEL);
2503	if (exec_list == NULL || exec2_list == NULL) {
2504		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
2505			  args->buffer_count);
2506		kvfree(exec_list);
2507		kvfree(exec2_list);
2508		return -ENOMEM;
2509	}
2510	err = copy_from_user(exec_list,
2511			     u64_to_user_ptr(args->buffers_ptr),
2512			     sizeof(*exec_list) * count);
2513	if (err) {
2514		DRM_DEBUG("copy %d exec entries failed %d\n",
2515			  args->buffer_count, err);
2516		kvfree(exec_list);
2517		kvfree(exec2_list);
2518		return -EFAULT;
2519	}
2520
2521	for (i = 0; i < args->buffer_count; i++) {
2522		exec2_list[i].handle = exec_list[i].handle;
2523		exec2_list[i].relocation_count = exec_list[i].relocation_count;
2524		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
2525		exec2_list[i].alignment = exec_list[i].alignment;
2526		exec2_list[i].offset = exec_list[i].offset;
2527		if (INTEL_GEN(to_i915(dev)) < 4)
2528			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
2529		else
2530			exec2_list[i].flags = 0;
2531	}
2532
2533	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
2534	if (exec2.flags & __EXEC_HAS_RELOC) {
 
 
 
 
 
 
 
 
 
 
 
2535		struct drm_i915_gem_exec_object __user *user_exec_list =
2536			u64_to_user_ptr(args->buffers_ptr);
2537
2538		/* Copy the new buffer offsets back to the user's exec list. */
2539		for (i = 0; i < args->buffer_count; i++) {
2540			if (!(exec2_list[i].offset & UPDATE))
2541				continue;
2542
2543			exec2_list[i].offset =
2544				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2545			exec2_list[i].offset &= PIN_OFFSET_MASK;
2546			if (__copy_to_user(&user_exec_list[i].offset,
2547					   &exec2_list[i].offset,
2548					   sizeof(user_exec_list[i].offset)))
 
 
 
 
2549				break;
 
2550		}
2551	}
2552
2553	kvfree(exec_list);
2554	kvfree(exec2_list);
2555	return err;
2556}
2557
2558int
2559i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
2560			   struct drm_file *file)
2561{
2562	struct drm_i915_gem_execbuffer2 *args = data;
2563	struct drm_i915_gem_exec_object2 *exec2_list;
2564	struct drm_syncobj **fences = NULL;
2565	const size_t count = args->buffer_count;
2566	int err;
2567
2568	if (!check_buffer_count(count)) {
2569		DRM_DEBUG("execbuf2 with %zd buffers\n", count);
 
2570		return -EINVAL;
2571	}
2572
2573	if (!i915_gem_check_execbuffer(args))
 
2574		return -EINVAL;
 
2575
2576	/* Allocate an extra slot for use by the command parser */
2577	exec2_list = kvmalloc_array(count + 1, eb_element_size(),
2578				    __GFP_NOWARN | GFP_KERNEL);
 
 
2579	if (exec2_list == NULL) {
2580		DRM_DEBUG("Failed to allocate exec list for %zd buffers\n",
2581			  count);
2582		return -ENOMEM;
2583	}
2584	if (copy_from_user(exec2_list,
2585			   u64_to_user_ptr(args->buffers_ptr),
2586			   sizeof(*exec2_list) * count)) {
2587		DRM_DEBUG("copy %zd exec entries failed\n", count);
2588		kvfree(exec2_list);
 
 
2589		return -EFAULT;
2590	}
2591
2592	if (args->flags & I915_EXEC_FENCE_ARRAY) {
2593		fences = get_fence_array(args, file);
2594		if (IS_ERR(fences)) {
2595			kvfree(exec2_list);
2596			return PTR_ERR(fences);
2597		}
2598	}
2599
2600	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
2601
2602	/*
2603	 * Now that we have begun execution of the batchbuffer, we ignore
2604	 * any new error after this point. Also given that we have already
2605	 * updated the associated relocations, we try to write out the current
2606	 * object locations irrespective of any error.
2607	 */
2608	if (args->flags & __EXEC_HAS_RELOC) {
2609		struct drm_i915_gem_exec_object2 __user *user_exec_list =
2610			u64_to_user_ptr(args->buffers_ptr);
2611		unsigned int i;
2612
2613		/* Copy the new buffer offsets back to the user's exec list. */
2614		user_access_begin();
2615		for (i = 0; i < args->buffer_count; i++) {
2616			if (!(exec2_list[i].offset & UPDATE))
2617				continue;
2618
2619			exec2_list[i].offset =
2620				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
2621			unsafe_put_user(exec2_list[i].offset,
2622					&user_exec_list[i].offset,
2623					end_user);
2624		}
2625end_user:
2626		user_access_end();
 
 
 
 
 
2627	}
2628
2629	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
2630	put_fence_array(args, fences);
2631	kvfree(exec2_list);
2632	return err;
2633}