Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
  1/*
  2 * SPDX-License-Identifier: MIT
  3 *
  4 * Copyright © 2019 Intel Corporation
  5 */
  6
  7#include <linux/kref.h>
  8
  9#include "gem/i915_gem_pm.h"
 10#include "gt/intel_gt.h"
 11
 12#include "i915_selftest.h"
 13
 14#include "igt_flush_test.h"
 15#include "lib_sw_fence.h"
 16
 17#define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab"
 18
 19static int
 20alloc_empty_config(struct i915_perf *perf)
 21{
 22	struct i915_oa_config *oa_config;
 23
 24	oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL);
 25	if (!oa_config)
 26		return -ENOMEM;
 27
 28	oa_config->perf = perf;
 29	kref_init(&oa_config->ref);
 30
 31	strlcpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid));
 32
 33	mutex_lock(&perf->metrics_lock);
 34
 35	oa_config->id = idr_alloc(&perf->metrics_idr, oa_config, 2, 0, GFP_KERNEL);
 36	if (oa_config->id < 0)  {
 37		mutex_unlock(&perf->metrics_lock);
 38		i915_oa_config_put(oa_config);
 39		return -ENOMEM;
 40	}
 41
 42	mutex_unlock(&perf->metrics_lock);
 43
 44	return 0;
 45}
 46
 47static void
 48destroy_empty_config(struct i915_perf *perf)
 49{
 50	struct i915_oa_config *oa_config = NULL, *tmp;
 51	int id;
 52
 53	mutex_lock(&perf->metrics_lock);
 54
 55	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
 56		if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
 57			oa_config = tmp;
 58			break;
 59		}
 60	}
 61
 62	if (oa_config)
 63		idr_remove(&perf->metrics_idr, oa_config->id);
 64
 65	mutex_unlock(&perf->metrics_lock);
 66
 67	if (oa_config)
 68		i915_oa_config_put(oa_config);
 69}
 70
 71static struct i915_oa_config *
 72get_empty_config(struct i915_perf *perf)
 73{
 74	struct i915_oa_config *oa_config = NULL, *tmp;
 75	int id;
 76
 77	mutex_lock(&perf->metrics_lock);
 78
 79	idr_for_each_entry(&perf->metrics_idr, tmp, id) {
 80		if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) {
 81			oa_config = i915_oa_config_get(tmp);
 82			break;
 83		}
 84	}
 85
 86	mutex_unlock(&perf->metrics_lock);
 87
 88	return oa_config;
 89}
 90
 91static struct i915_perf_stream *
 92test_stream(struct i915_perf *perf)
 93{
 94	struct drm_i915_perf_open_param param = {};
 95	struct i915_oa_config *oa_config = get_empty_config(perf);
 96	struct perf_open_properties props = {
 97		.engine = intel_engine_lookup_user(perf->i915,
 98						   I915_ENGINE_CLASS_RENDER,
 99						   0),
100		.sample_flags = SAMPLE_OA_REPORT,
101		.oa_format = IS_GEN(perf->i915, 12) ?
102		I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
103	};
104	struct i915_perf_stream *stream;
105
106	if (!oa_config)
107		return NULL;
108
109	props.metrics_set = oa_config->id;
110
111	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
112	if (!stream) {
113		i915_oa_config_put(oa_config);
114		return NULL;
115	}
116
117	stream->perf = perf;
118
119	mutex_lock(&perf->lock);
120	if (i915_oa_stream_init(stream, &param, &props)) {
121		kfree(stream);
122		stream =  NULL;
123	}
124	mutex_unlock(&perf->lock);
125
126	i915_oa_config_put(oa_config);
127
128	return stream;
129}
130
131static void stream_destroy(struct i915_perf_stream *stream)
132{
133	struct i915_perf *perf = stream->perf;
134
135	mutex_lock(&perf->lock);
136	i915_perf_destroy_locked(stream);
137	mutex_unlock(&perf->lock);
138}
139
140static int live_sanitycheck(void *arg)
141{
142	struct drm_i915_private *i915 = arg;
143	struct i915_perf_stream *stream;
144
145	/* Quick check we can create a perf stream */
146
147	stream = test_stream(&i915->perf);
148	if (!stream)
149		return -EINVAL;
150
151	stream_destroy(stream);
152	return 0;
153}
154
155static int write_timestamp(struct i915_request *rq, int slot)
156{
157	u32 *cs;
158	int len;
159
160	cs = intel_ring_begin(rq, 6);
161	if (IS_ERR(cs))
162		return PTR_ERR(cs);
163
164	len = 5;
165	if (INTEL_GEN(rq->engine->i915) >= 8)
166		len++;
167
168	*cs++ = GFX_OP_PIPE_CONTROL(len);
169	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
170		PIPE_CONTROL_STORE_DATA_INDEX |
171		PIPE_CONTROL_WRITE_TIMESTAMP;
172	*cs++ = slot * sizeof(u32);
173	*cs++ = 0;
174	*cs++ = 0;
175	*cs++ = 0;
176
177	intel_ring_advance(rq, cs);
178
179	return 0;
180}
181
182static ktime_t poll_status(struct i915_request *rq, int slot)
183{
184	while (!intel_read_status_page(rq->engine, slot) &&
185	       !i915_request_completed(rq))
186		cpu_relax();
187
188	return ktime_get();
189}
190
191static int live_noa_delay(void *arg)
192{
193	struct drm_i915_private *i915 = arg;
194	struct i915_perf_stream *stream;
195	struct i915_request *rq;
196	ktime_t t0, t1;
197	u64 expected;
198	u32 delay;
199	int err;
200	int i;
201
202	/* Check that the GPU delays matches expectations */
203
204	stream = test_stream(&i915->perf);
205	if (!stream)
206		return -ENOMEM;
207
208	expected = atomic64_read(&stream->perf->noa_programming_delay);
209
210	if (stream->engine->class != RENDER_CLASS) {
211		err = -ENODEV;
212		goto out;
213	}
214
215	for (i = 0; i < 4; i++)
216		intel_write_status_page(stream->engine, 0x100 + i, 0);
217
218	rq = intel_engine_create_kernel_request(stream->engine);
219	if (IS_ERR(rq)) {
220		err = PTR_ERR(rq);
221		goto out;
222	}
223
224	if (rq->engine->emit_init_breadcrumb) {
225		err = rq->engine->emit_init_breadcrumb(rq);
226		if (err) {
227			i915_request_add(rq);
228			goto out;
229		}
230	}
231
232	err = write_timestamp(rq, 0x100);
233	if (err) {
234		i915_request_add(rq);
235		goto out;
236	}
237
238	err = rq->engine->emit_bb_start(rq,
239					i915_ggtt_offset(stream->noa_wait), 0,
240					I915_DISPATCH_SECURE);
241	if (err) {
242		i915_request_add(rq);
243		goto out;
244	}
245
246	err = write_timestamp(rq, 0x102);
247	if (err) {
248		i915_request_add(rq);
249		goto out;
250	}
251
252	i915_request_get(rq);
253	i915_request_add(rq);
254
255	preempt_disable();
256	t0 = poll_status(rq, 0x100);
257	t1 = poll_status(rq, 0x102);
258	preempt_enable();
259
260	pr_info("CPU delay: %lluns, expected %lluns\n",
261		ktime_sub(t1, t0), expected);
262
263	delay = intel_read_status_page(stream->engine, 0x102);
264	delay -= intel_read_status_page(stream->engine, 0x100);
265	delay = i915_cs_timestamp_ticks_to_ns(i915, delay);
266	pr_info("GPU delay: %uns, expected %lluns\n",
267		delay, expected);
268
269	if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
270		pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
271		       delay / 1000,
272		       div_u64(3 * expected, 4000),
273		       div_u64(3 * expected, 2000));
274		err = -EINVAL;
275	}
276
277	i915_request_put(rq);
278out:
279	stream_destroy(stream);
280	return err;
281}
282
283static int live_noa_gpr(void *arg)
284{
285	struct drm_i915_private *i915 = arg;
286	struct i915_perf_stream *stream;
287	struct intel_context *ce;
288	struct i915_request *rq;
289	u32 *cs, *store;
290	void *scratch;
291	u32 gpr0;
292	int err;
293	int i;
294
295	/* Check that the delay does not clobber user context state (GPR) */
296
297	stream = test_stream(&i915->perf);
298	if (!stream)
299		return -ENOMEM;
300
301	gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0));
302
303	ce = intel_context_create(stream->engine);
304	if (IS_ERR(ce)) {
305		err = PTR_ERR(ce);
306		goto out;
307	}
308
309	/* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */
310	scratch = kmap(ce->vm->scratch[0].base.page);
311	memset(scratch, POISON_FREE, PAGE_SIZE);
312
313	rq = intel_context_create_request(ce);
314	if (IS_ERR(rq)) {
315		err = PTR_ERR(rq);
316		goto out_ce;
317	}
318	i915_request_get(rq);
319
320	if (rq->engine->emit_init_breadcrumb) {
321		err = rq->engine->emit_init_breadcrumb(rq);
322		if (err) {
323			i915_request_add(rq);
324			goto out_rq;
325		}
326	}
327
328	/* Fill the 16 qword [32 dword] GPR with a known unlikely value */
329	cs = intel_ring_begin(rq, 2 * 32 + 2);
330	if (IS_ERR(cs)) {
331		err = PTR_ERR(cs);
332		i915_request_add(rq);
333		goto out_rq;
334	}
335
336	*cs++ = MI_LOAD_REGISTER_IMM(32);
337	for (i = 0; i < 32; i++) {
338		*cs++ = gpr0 + i * sizeof(u32);
339		*cs++ = STACK_MAGIC;
340	}
341	*cs++ = MI_NOOP;
342	intel_ring_advance(rq, cs);
343
344	/* Execute the GPU delay */
345	err = rq->engine->emit_bb_start(rq,
346					i915_ggtt_offset(stream->noa_wait), 0,
347					I915_DISPATCH_SECURE);
348	if (err) {
349		i915_request_add(rq);
350		goto out_rq;
351	}
352
353	/* Read the GPR back, using the pinned global HWSP for convenience */
354	store = memset32(rq->engine->status_page.addr + 512, 0, 32);
355	for (i = 0; i < 32; i++) {
356		u32 cmd;
357
358		cs = intel_ring_begin(rq, 4);
359		if (IS_ERR(cs)) {
360			err = PTR_ERR(cs);
361			i915_request_add(rq);
362			goto out_rq;
363		}
364
365		cmd = MI_STORE_REGISTER_MEM;
366		if (INTEL_GEN(i915) >= 8)
367			cmd++;
368		cmd |= MI_USE_GGTT;
369
370		*cs++ = cmd;
371		*cs++ = gpr0 + i * sizeof(u32);
372		*cs++ = i915_ggtt_offset(rq->engine->status_page.vma) +
373			offset_in_page(store) +
374			i * sizeof(u32);
375		*cs++ = 0;
376		intel_ring_advance(rq, cs);
377	}
378
379	i915_request_add(rq);
380
381	if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) {
382		pr_err("noa_wait timed out\n");
383		intel_gt_set_wedged(stream->engine->gt);
384		err = -EIO;
385		goto out_rq;
386	}
387
388	/* Verify that the GPR contain our expected values */
389	for (i = 0; i < 32; i++) {
390		if (store[i] == STACK_MAGIC)
391			continue;
392
393		pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n",
394		       i, store[i], STACK_MAGIC);
395		err = -EINVAL;
396	}
397
398	/* Verify that the user's scratch page was not used for GPR storage */
399	if (memchr_inv(scratch, POISON_FREE, PAGE_SIZE)) {
400		pr_err("Scratch page overwritten!\n");
401		igt_hexdump(scratch, 4096);
402		err = -EINVAL;
403	}
404
405out_rq:
406	i915_request_put(rq);
407out_ce:
408	kunmap(ce->vm->scratch[0].base.page);
409	intel_context_put(ce);
410out:
411	stream_destroy(stream);
412	return err;
413}
414
415int i915_perf_live_selftests(struct drm_i915_private *i915)
416{
417	static const struct i915_subtest tests[] = {
418		SUBTEST(live_sanitycheck),
419		SUBTEST(live_noa_delay),
420		SUBTEST(live_noa_gpr),
421	};
422	struct i915_perf *perf = &i915->perf;
423	int err;
424
425	if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
426		return 0;
427
428	if (intel_gt_is_wedged(&i915->gt))
429		return 0;
430
431	err = alloc_empty_config(&i915->perf);
432	if (err)
433		return err;
434
435	err = i915_subtests(tests, i915);
436
437	destroy_empty_config(&i915->perf);
438
439	return err;
440}