Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2016 Intel Corporation
  5 */
  6
  7#include <linux/prime_numbers.h>
  8
  9#include "gt/intel_gt.h"
 10#include "gt/intel_gt_pm.h"
 11#include "huge_gem_object.h"
 12#include "i915_selftest.h"
 13#include "selftests/igt_flush_test.h"
 14
 15struct tile {
 16	unsigned int width;
 17	unsigned int height;
 18	unsigned int stride;
 19	unsigned int size;
 20	unsigned int tiling;
 21	unsigned int swizzle;
 22};
 23
 24static u64 swizzle_bit(unsigned int bit, u64 offset)
 25{
 26	return (offset & BIT_ULL(bit)) >> (bit - 6);
 27}
 28
 29static u64 tiled_offset(const struct tile *tile, u64 v)
 30{
 31	u64 x, y;
 32
 33	if (tile->tiling == I915_TILING_NONE)
 34		return v;
 35
 36	y = div64_u64_rem(v, tile->stride, &x);
 37	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
 38
 39	if (tile->tiling == I915_TILING_X) {
 40		v += y * tile->width;
 41		v += div64_u64_rem(x, tile->width, &x) << tile->size;
 42		v += x;
 43	} else if (tile->width == 128) {
 44		const unsigned int ytile_span = 16;
 45		const unsigned int ytile_height = 512;
 46
 47		v += y * ytile_span;
 48		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
 49		v += x;
 50	} else {
 51		const unsigned int ytile_span = 32;
 52		const unsigned int ytile_height = 256;
 53
 54		v += y * ytile_span;
 55		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
 56		v += x;
 57	}
 58
 59	switch (tile->swizzle) {
 60	case I915_BIT_6_SWIZZLE_9:
 61		v ^= swizzle_bit(9, v);
 62		break;
 63	case I915_BIT_6_SWIZZLE_9_10:
 64		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
 65		break;
 66	case I915_BIT_6_SWIZZLE_9_11:
 67		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
 68		break;
 69	case I915_BIT_6_SWIZZLE_9_10_11:
 70		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
 71		break;
 72	}
 73
 74	return v;
 75}
 76
 77static int check_partial_mapping(struct drm_i915_gem_object *obj,
 78				 const struct tile *tile,
 79				 unsigned long end_time)
 80{
 81	const unsigned int nreal = obj->scratch / PAGE_SIZE;
 82	const unsigned long npages = obj->base.size / PAGE_SIZE;
 83	struct i915_vma *vma;
 84	unsigned long page;
 85	int err;
 86
 87	if (igt_timeout(end_time,
 88			"%s: timed out before tiling=%d stride=%d\n",
 89			__func__, tile->tiling, tile->stride))
 90		return -EINTR;
 91
 92	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
 93	if (err) {
 94		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
 95		       tile->tiling, tile->stride, err);
 96		return err;
 97	}
 98
 99	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
100	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
101
102	i915_gem_object_lock(obj);
103	err = i915_gem_object_set_to_gtt_domain(obj, true);
104	i915_gem_object_unlock(obj);
105	if (err) {
106		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
107		return err;
108	}
109
110	for_each_prime_number_from(page, 1, npages) {
111		struct i915_ggtt_view view =
112			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
113		u32 __iomem *io;
114		struct page *p;
115		unsigned int n;
116		u64 offset;
117		u32 *cpu;
118
119		GEM_BUG_ON(view.partial.size > nreal);
120		cond_resched();
121
122		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
123		if (IS_ERR(vma)) {
124			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
125			       page, (int)PTR_ERR(vma));
126			return PTR_ERR(vma);
127		}
128
129		n = page - view.partial.offset;
130		GEM_BUG_ON(n >= view.partial.size);
131
132		io = i915_vma_pin_iomap(vma);
133		i915_vma_unpin(vma);
134		if (IS_ERR(io)) {
135			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
136			       page, (int)PTR_ERR(io));
137			return PTR_ERR(io);
138		}
139
140		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
141		i915_vma_unpin_iomap(vma);
142
143		offset = tiled_offset(tile, page << PAGE_SHIFT);
144		if (offset >= obj->base.size)
145			continue;
146
147		intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
148
149		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
150		cpu = kmap(p) + offset_in_page(offset);
151		drm_clflush_virt_range(cpu, sizeof(*cpu));
152		if (*cpu != (u32)page) {
153			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
154			       page, n,
155			       view.partial.offset,
156			       view.partial.size,
157			       vma->size >> PAGE_SHIFT,
158			       tile->tiling ? tile_row_pages(obj) : 0,
159			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
160			       offset >> PAGE_SHIFT,
161			       (unsigned int)offset_in_page(offset),
162			       offset,
163			       (u32)page, *cpu);
164			err = -EINVAL;
165		}
166		*cpu = 0;
167		drm_clflush_virt_range(cpu, sizeof(*cpu));
168		kunmap(p);
169		if (err)
170			return err;
171
172		i915_vma_destroy(vma);
173	}
174
175	return 0;
176}
177
178static int igt_partial_tiling(void *arg)
179{
180	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
181	struct drm_i915_private *i915 = arg;
182	struct drm_i915_gem_object *obj;
183	intel_wakeref_t wakeref;
184	int tiling;
185	int err;
186
187	/* We want to check the page mapping and fencing of a large object
188	 * mmapped through the GTT. The object we create is larger than can
189	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
190	 * We then check that a write through each partial GGTT vma ends up
191	 * in the right set of pages within the object, and with the expected
192	 * tiling, which we verify by manual swizzling.
193	 */
194
195	obj = huge_gem_object(i915,
196			      nreal << PAGE_SHIFT,
197			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
198	if (IS_ERR(obj))
199		return PTR_ERR(obj);
200
201	err = i915_gem_object_pin_pages(obj);
202	if (err) {
203		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
204		       nreal, obj->base.size / PAGE_SIZE, err);
205		goto out;
206	}
207
208	mutex_lock(&i915->drm.struct_mutex);
209	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
210
211	if (1) {
212		IGT_TIMEOUT(end);
213		struct tile tile;
214
215		tile.height = 1;
216		tile.width = 1;
217		tile.size = 0;
218		tile.stride = 0;
219		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
220		tile.tiling = I915_TILING_NONE;
221
222		err = check_partial_mapping(obj, &tile, end);
223		if (err && err != -EINTR)
224			goto out_unlock;
225	}
226
227	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
228		IGT_TIMEOUT(end);
229		unsigned int max_pitch;
230		unsigned int pitch;
231		struct tile tile;
232
233		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
234			/*
235			 * The swizzling pattern is actually unknown as it
236			 * varies based on physical address of each page.
237			 * See i915_gem_detect_bit_6_swizzle().
238			 */
239			break;
240
241		tile.tiling = tiling;
242		switch (tiling) {
243		case I915_TILING_X:
244			tile.swizzle = i915->mm.bit_6_swizzle_x;
245			break;
246		case I915_TILING_Y:
247			tile.swizzle = i915->mm.bit_6_swizzle_y;
248			break;
249		}
250
251		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
252		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
253		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
254			continue;
255
256		if (INTEL_GEN(i915) <= 2) {
257			tile.height = 16;
258			tile.width = 128;
259			tile.size = 11;
260		} else if (tile.tiling == I915_TILING_Y &&
261			   HAS_128_BYTE_Y_TILING(i915)) {
262			tile.height = 32;
263			tile.width = 128;
264			tile.size = 12;
265		} else {
266			tile.height = 8;
267			tile.width = 512;
268			tile.size = 12;
269		}
270
271		if (INTEL_GEN(i915) < 4)
272			max_pitch = 8192 / tile.width;
273		else if (INTEL_GEN(i915) < 7)
274			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
275		else
276			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
277
278		for (pitch = max_pitch; pitch; pitch >>= 1) {
279			tile.stride = tile.width * pitch;
280			err = check_partial_mapping(obj, &tile, end);
281			if (err == -EINTR)
282				goto next_tiling;
283			if (err)
284				goto out_unlock;
285
286			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
287				tile.stride = tile.width * (pitch - 1);
288				err = check_partial_mapping(obj, &tile, end);
289				if (err == -EINTR)
290					goto next_tiling;
291				if (err)
292					goto out_unlock;
293			}
294
295			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
296				tile.stride = tile.width * (pitch + 1);
297				err = check_partial_mapping(obj, &tile, end);
298				if (err == -EINTR)
299					goto next_tiling;
300				if (err)
301					goto out_unlock;
302			}
303		}
304
305		if (INTEL_GEN(i915) >= 4) {
306			for_each_prime_number(pitch, max_pitch) {
307				tile.stride = tile.width * pitch;
308				err = check_partial_mapping(obj, &tile, end);
309				if (err == -EINTR)
310					goto next_tiling;
311				if (err)
312					goto out_unlock;
313			}
314		}
315
316next_tiling: ;
317	}
318
319out_unlock:
320	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
321	mutex_unlock(&i915->drm.struct_mutex);
322	i915_gem_object_unpin_pages(obj);
323out:
324	i915_gem_object_put(obj);
325	return err;
326}
327
328static int make_obj_busy(struct drm_i915_gem_object *obj)
329{
330	struct drm_i915_private *i915 = to_i915(obj->base.dev);
331	struct intel_engine_cs *engine;
332	enum intel_engine_id id;
333	struct i915_vma *vma;
334	int err;
335
336	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
337	if (IS_ERR(vma))
338		return PTR_ERR(vma);
339
340	err = i915_vma_pin(vma, 0, 0, PIN_USER);
341	if (err)
342		return err;
343
344	for_each_engine(engine, i915, id) {
345		struct i915_request *rq;
346
347		rq = i915_request_create(engine->kernel_context);
348		if (IS_ERR(rq)) {
349			i915_vma_unpin(vma);
350			return PTR_ERR(rq);
351		}
352
353		i915_vma_lock(vma);
354		err = i915_request_await_object(rq, vma->obj, true);
355		if (err == 0)
356			err = i915_vma_move_to_active(vma, rq,
357						      EXEC_OBJECT_WRITE);
358		i915_vma_unlock(vma);
359
360		i915_request_add(rq);
361	}
362
363	i915_vma_unpin(vma);
364	i915_gem_object_put(obj); /* leave it only alive via its active ref */
365
366	return err;
367}
368
369static bool assert_mmap_offset(struct drm_i915_private *i915,
370			       unsigned long size,
371			       int expected)
372{
373	struct drm_i915_gem_object *obj;
374	int err;
375
376	obj = i915_gem_object_create_internal(i915, size);
377	if (IS_ERR(obj))
378		return PTR_ERR(obj);
379
380	err = create_mmap_offset(obj);
381	i915_gem_object_put(obj);
382
383	return err == expected;
384}
385
386static void disable_retire_worker(struct drm_i915_private *i915)
387{
388	i915_gem_driver_unregister__shrinker(i915);
389
390	intel_gt_pm_get(&i915->gt);
391
392	cancel_delayed_work_sync(&i915->gem.retire_work);
393	flush_work(&i915->gem.idle_work);
394}
395
396static void restore_retire_worker(struct drm_i915_private *i915)
397{
398	intel_gt_pm_put(&i915->gt);
399
400	mutex_lock(&i915->drm.struct_mutex);
401	igt_flush_test(i915, I915_WAIT_LOCKED);
402	mutex_unlock(&i915->drm.struct_mutex);
403
404	i915_gem_driver_register__shrinker(i915);
405}
406
407static void mmap_offset_lock(struct drm_i915_private *i915)
408	__acquires(&i915->drm.vma_offset_manager->vm_lock)
409{
410	write_lock(&i915->drm.vma_offset_manager->vm_lock);
411}
412
413static void mmap_offset_unlock(struct drm_i915_private *i915)
414	__releases(&i915->drm.vma_offset_manager->vm_lock)
415{
416	write_unlock(&i915->drm.vma_offset_manager->vm_lock);
417}
418
419static int igt_mmap_offset_exhaustion(void *arg)
420{
421	struct drm_i915_private *i915 = arg;
422	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
423	struct drm_i915_gem_object *obj;
424	struct drm_mm_node resv, *hole;
425	u64 hole_start, hole_end;
426	int loop, err;
427
428	/* Disable background reaper */
429	disable_retire_worker(i915);
430	GEM_BUG_ON(!i915->gt.awake);
431
432	/* Trim the device mmap space to only a page */
433	memset(&resv, 0, sizeof(resv));
434	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
435		resv.start = hole_start;
436		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
437		mmap_offset_lock(i915);
438		err = drm_mm_reserve_node(mm, &resv);
439		mmap_offset_unlock(i915);
440		if (err) {
441			pr_err("Failed to trim VMA manager, err=%d\n", err);
442			goto out_park;
443		}
444		break;
445	}
446
447	/* Just fits! */
448	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
449		pr_err("Unable to insert object into single page hole\n");
450		err = -EINVAL;
451		goto out;
452	}
453
454	/* Too large */
455	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
456		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
457		err = -EINVAL;
458		goto out;
459	}
460
461	/* Fill the hole, further allocation attempts should then fail */
462	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
463	if (IS_ERR(obj)) {
464		err = PTR_ERR(obj);
465		goto out;
466	}
467
468	err = create_mmap_offset(obj);
469	if (err) {
470		pr_err("Unable to insert object into reclaimed hole\n");
471		goto err_obj;
472	}
473
474	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
475		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
476		err = -EINVAL;
477		goto err_obj;
478	}
479
480	i915_gem_object_put(obj);
481
482	/* Now fill with busy dead objects that we expect to reap */
483	for (loop = 0; loop < 3; loop++) {
484		if (intel_gt_is_wedged(&i915->gt))
485			break;
486
487		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
488		if (IS_ERR(obj)) {
489			err = PTR_ERR(obj);
490			goto out;
491		}
492
493		mutex_lock(&i915->drm.struct_mutex);
494		err = make_obj_busy(obj);
495		mutex_unlock(&i915->drm.struct_mutex);
496		if (err) {
497			pr_err("[loop %d] Failed to busy the object\n", loop);
498			goto err_obj;
499		}
500	}
501
502out:
503	mmap_offset_lock(i915);
504	drm_mm_remove_node(&resv);
505	mmap_offset_unlock(i915);
506out_park:
507	restore_retire_worker(i915);
508	return err;
509err_obj:
510	i915_gem_object_put(obj);
511	goto out;
512}
513
514int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
515{
516	static const struct i915_subtest tests[] = {
517		SUBTEST(igt_partial_tiling),
518		SUBTEST(igt_mmap_offset_exhaustion),
519	};
520
521	return i915_subtests(tests, i915);
522}