intel_lrc.c - drivers/gpu/drm/i915/gt/intel_lrc.c - Linux source code v3.1

Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: MIT
   2/*
   3 * Copyright © 2014 Intel Corporation
   4 */
   5
   6#include "gem/i915_gem_lmem.h"
   7
   8#include "gen8_engine_cs.h"
   9#include "i915_drv.h"
  10#include "i915_perf.h"
  11#include "i915_reg.h"
  12#include "intel_context.h"
  13#include "intel_engine.h"
  14#include "intel_engine_regs.h"
  15#include "intel_gpu_commands.h"
  16#include "intel_gt.h"
  17#include "intel_gt_regs.h"
  18#include "intel_lrc.h"
  19#include "intel_lrc_reg.h"
  20#include "intel_ring.h"
  21#include "shmem_utils.h"
  22
  23/*
  24 * The per-platform tables are u8-encoded in @data. Decode @data and set the
  25 * addresses' offset and commands in @regs. The following encoding is used
  26 * for each byte. There are 2 steps: decoding commands and decoding addresses.
  27 *
  28 * Commands:
  29 * [7]: create NOPs - number of NOPs are set in lower bits
  30 * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
  31 *      MI_LRI_FORCE_POSTED
  32 * [5:0]: Number of NOPs or registers to set values to in case of
  33 *        MI_LOAD_REGISTER_IMM
  34 *
  35 * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
  36 * number of registers. They are set by using the REG/REG16 macros: the former
  37 * is used for offsets smaller than 0x200 while the latter is for values bigger
  38 * than that. Those macros already set all the bits documented below correctly:
  39 *
  40 * [7]: When a register offset needs more than 6 bits, use additional bytes, to
  41 *      follow, for the lower bits
  42 * [6:0]: Register offset, without considering the engine base.
  43 *
  44 * This function only tweaks the commands and register offsets. Values are not
  45 * filled out.
  46 */
  47static void set_offsets(u32 *regs,
  48			const u8 *data,
  49			const struct intel_engine_cs *engine,
  50			bool close)
  51#define NOP(x) (BIT(7) | (x))
  52#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
  53#define POSTED BIT(0)
  54#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
  55#define REG16(x) \
  56	(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
  57	(((x) >> 2) & 0x7f)
  58#define END 0
  59{
  60	const u32 base = engine->mmio_base;
  61
  62	while (*data) {
  63		u8 count, flags;
  64
  65		if (*data & BIT(7)) { /* skip */
  66			count = *data++ & ~BIT(7);
  67			regs += count;
  68			continue;
  69		}
  70
  71		count = *data & 0x3f;
  72		flags = *data >> 6;
  73		data++;
  74
  75		*regs = MI_LOAD_REGISTER_IMM(count);
  76		if (flags & POSTED)
  77			*regs |= MI_LRI_FORCE_POSTED;
  78		if (GRAPHICS_VER(engine->i915) >= 11)
  79			*regs |= MI_LRI_LRM_CS_MMIO;
  80		regs++;
  81
  82		GEM_BUG_ON(!count);
  83		do {
  84			u32 offset = 0;
  85			u8 v;
  86
  87			do {
  88				v = *data++;
  89				offset <<= 7;
  90				offset |= v & ~BIT(7);
  91			} while (v & BIT(7));
  92
  93			regs[0] = base + (offset << 2);
  94			regs += 2;
  95		} while (--count);
  96	}
  97
  98	if (close) {
  99		/* Close the batch; used mainly by live_lrc_layout() */
 100		*regs = MI_BATCH_BUFFER_END;
 101		if (GRAPHICS_VER(engine->i915) >= 11)
 102			*regs |= BIT(0);
 103	}
 104}
 105
 106static const u8 gen8_xcs_offsets[] = {
 107	NOP(1),
 108	LRI(11, 0),
 109	REG16(0x244),
 110	REG(0x034),
 111	REG(0x030),
 112	REG(0x038),
 113	REG(0x03c),
 114	REG(0x168),
 115	REG(0x140),
 116	REG(0x110),
 117	REG(0x11c),
 118	REG(0x114),
 119	REG(0x118),
 120
 121	NOP(9),
 122	LRI(9, 0),
 123	REG16(0x3a8),
 124	REG16(0x28c),
 125	REG16(0x288),
 126	REG16(0x284),
 127	REG16(0x280),
 128	REG16(0x27c),
 129	REG16(0x278),
 130	REG16(0x274),
 131	REG16(0x270),
 132
 133	NOP(13),
 134	LRI(2, 0),
 135	REG16(0x200),
 136	REG(0x028),
 137
 138	END
 139};
 140
 141static const u8 gen9_xcs_offsets[] = {
 142	NOP(1),
 143	LRI(14, POSTED),
 144	REG16(0x244),
 145	REG(0x034),
 146	REG(0x030),
 147	REG(0x038),
 148	REG(0x03c),
 149	REG(0x168),
 150	REG(0x140),
 151	REG(0x110),
 152	REG(0x11c),
 153	REG(0x114),
 154	REG(0x118),
 155	REG(0x1c0),
 156	REG(0x1c4),
 157	REG(0x1c8),
 158
 159	NOP(3),
 160	LRI(9, POSTED),
 161	REG16(0x3a8),
 162	REG16(0x28c),
 163	REG16(0x288),
 164	REG16(0x284),
 165	REG16(0x280),
 166	REG16(0x27c),
 167	REG16(0x278),
 168	REG16(0x274),
 169	REG16(0x270),
 170
 171	NOP(13),
 172	LRI(1, POSTED),
 173	REG16(0x200),
 174
 175	NOP(13),
 176	LRI(44, POSTED),
 177	REG(0x028),
 178	REG(0x09c),
 179	REG(0x0c0),
 180	REG(0x178),
 181	REG(0x17c),
 182	REG16(0x358),
 183	REG(0x170),
 184	REG(0x150),
 185	REG(0x154),
 186	REG(0x158),
 187	REG16(0x41c),
 188	REG16(0x600),
 189	REG16(0x604),
 190	REG16(0x608),
 191	REG16(0x60c),
 192	REG16(0x610),
 193	REG16(0x614),
 194	REG16(0x618),
 195	REG16(0x61c),
 196	REG16(0x620),
 197	REG16(0x624),
 198	REG16(0x628),
 199	REG16(0x62c),
 200	REG16(0x630),
 201	REG16(0x634),
 202	REG16(0x638),
 203	REG16(0x63c),
 204	REG16(0x640),
 205	REG16(0x644),
 206	REG16(0x648),
 207	REG16(0x64c),
 208	REG16(0x650),
 209	REG16(0x654),
 210	REG16(0x658),
 211	REG16(0x65c),
 212	REG16(0x660),
 213	REG16(0x664),
 214	REG16(0x668),
 215	REG16(0x66c),
 216	REG16(0x670),
 217	REG16(0x674),
 218	REG16(0x678),
 219	REG16(0x67c),
 220	REG(0x068),
 221
 222	END
 223};
 224
 225static const u8 gen12_xcs_offsets[] = {
 226	NOP(1),
 227	LRI(13, POSTED),
 228	REG16(0x244),
 229	REG(0x034),
 230	REG(0x030),
 231	REG(0x038),
 232	REG(0x03c),
 233	REG(0x168),
 234	REG(0x140),
 235	REG(0x110),
 236	REG(0x1c0),
 237	REG(0x1c4),
 238	REG(0x1c8),
 239	REG(0x180),
 240	REG16(0x2b4),
 241
 242	NOP(5),
 243	LRI(9, POSTED),
 244	REG16(0x3a8),
 245	REG16(0x28c),
 246	REG16(0x288),
 247	REG16(0x284),
 248	REG16(0x280),
 249	REG16(0x27c),
 250	REG16(0x278),
 251	REG16(0x274),
 252	REG16(0x270),
 253
 254	END
 255};
 256
 257static const u8 dg2_xcs_offsets[] = {
 258	NOP(1),
 259	LRI(15, POSTED),
 260	REG16(0x244),
 261	REG(0x034),
 262	REG(0x030),
 263	REG(0x038),
 264	REG(0x03c),
 265	REG(0x168),
 266	REG(0x140),
 267	REG(0x110),
 268	REG(0x1c0),
 269	REG(0x1c4),
 270	REG(0x1c8),
 271	REG(0x180),
 272	REG16(0x2b4),
 273	REG(0x120),
 274	REG(0x124),
 275
 276	NOP(1),
 277	LRI(9, POSTED),
 278	REG16(0x3a8),
 279	REG16(0x28c),
 280	REG16(0x288),
 281	REG16(0x284),
 282	REG16(0x280),
 283	REG16(0x27c),
 284	REG16(0x278),
 285	REG16(0x274),
 286	REG16(0x270),
 287
 288	END
 289};
 290
 291static const u8 gen8_rcs_offsets[] = {
 292	NOP(1),
 293	LRI(14, POSTED),
 294	REG16(0x244),
 295	REG(0x034),
 296	REG(0x030),
 297	REG(0x038),
 298	REG(0x03c),
 299	REG(0x168),
 300	REG(0x140),
 301	REG(0x110),
 302	REG(0x11c),
 303	REG(0x114),
 304	REG(0x118),
 305	REG(0x1c0),
 306	REG(0x1c4),
 307	REG(0x1c8),
 308
 309	NOP(3),
 310	LRI(9, POSTED),
 311	REG16(0x3a8),
 312	REG16(0x28c),
 313	REG16(0x288),
 314	REG16(0x284),
 315	REG16(0x280),
 316	REG16(0x27c),
 317	REG16(0x278),
 318	REG16(0x274),
 319	REG16(0x270),
 320
 321	NOP(13),
 322	LRI(1, 0),
 323	REG(0x0c8),
 324
 325	END
 326};
 327
 328static const u8 gen9_rcs_offsets[] = {
 329	NOP(1),
 330	LRI(14, POSTED),
 331	REG16(0x244),
 332	REG(0x34),
 333	REG(0x30),
 334	REG(0x38),
 335	REG(0x3c),
 336	REG(0x168),
 337	REG(0x140),
 338	REG(0x110),
 339	REG(0x11c),
 340	REG(0x114),
 341	REG(0x118),
 342	REG(0x1c0),
 343	REG(0x1c4),
 344	REG(0x1c8),
 345
 346	NOP(3),
 347	LRI(9, POSTED),
 348	REG16(0x3a8),
 349	REG16(0x28c),
 350	REG16(0x288),
 351	REG16(0x284),
 352	REG16(0x280),
 353	REG16(0x27c),
 354	REG16(0x278),
 355	REG16(0x274),
 356	REG16(0x270),
 357
 358	NOP(13),
 359	LRI(1, 0),
 360	REG(0xc8),
 361
 362	NOP(13),
 363	LRI(44, POSTED),
 364	REG(0x28),
 365	REG(0x9c),
 366	REG(0xc0),
 367	REG(0x178),
 368	REG(0x17c),
 369	REG16(0x358),
 370	REG(0x170),
 371	REG(0x150),
 372	REG(0x154),
 373	REG(0x158),
 374	REG16(0x41c),
 375	REG16(0x600),
 376	REG16(0x604),
 377	REG16(0x608),
 378	REG16(0x60c),
 379	REG16(0x610),
 380	REG16(0x614),
 381	REG16(0x618),
 382	REG16(0x61c),
 383	REG16(0x620),
 384	REG16(0x624),
 385	REG16(0x628),
 386	REG16(0x62c),
 387	REG16(0x630),
 388	REG16(0x634),
 389	REG16(0x638),
 390	REG16(0x63c),
 391	REG16(0x640),
 392	REG16(0x644),
 393	REG16(0x648),
 394	REG16(0x64c),
 395	REG16(0x650),
 396	REG16(0x654),
 397	REG16(0x658),
 398	REG16(0x65c),
 399	REG16(0x660),
 400	REG16(0x664),
 401	REG16(0x668),
 402	REG16(0x66c),
 403	REG16(0x670),
 404	REG16(0x674),
 405	REG16(0x678),
 406	REG16(0x67c),
 407	REG(0x68),
 408
 409	END
 410};
 411
 412static const u8 gen11_rcs_offsets[] = {
 413	NOP(1),
 414	LRI(15, POSTED),
 415	REG16(0x244),
 416	REG(0x034),
 417	REG(0x030),
 418	REG(0x038),
 419	REG(0x03c),
 420	REG(0x168),
 421	REG(0x140),
 422	REG(0x110),
 423	REG(0x11c),
 424	REG(0x114),
 425	REG(0x118),
 426	REG(0x1c0),
 427	REG(0x1c4),
 428	REG(0x1c8),
 429	REG(0x180),
 430
 431	NOP(1),
 432	LRI(9, POSTED),
 433	REG16(0x3a8),
 434	REG16(0x28c),
 435	REG16(0x288),
 436	REG16(0x284),
 437	REG16(0x280),
 438	REG16(0x27c),
 439	REG16(0x278),
 440	REG16(0x274),
 441	REG16(0x270),
 442
 443	LRI(1, POSTED),
 444	REG(0x1b0),
 445
 446	NOP(10),
 447	LRI(1, 0),
 448	REG(0x0c8),
 449
 450	END
 451};
 452
 453static const u8 gen12_rcs_offsets[] = {
 454	NOP(1),
 455	LRI(13, POSTED),
 456	REG16(0x244),
 457	REG(0x034),
 458	REG(0x030),
 459	REG(0x038),
 460	REG(0x03c),
 461	REG(0x168),
 462	REG(0x140),
 463	REG(0x110),
 464	REG(0x1c0),
 465	REG(0x1c4),
 466	REG(0x1c8),
 467	REG(0x180),
 468	REG16(0x2b4),
 469
 470	NOP(5),
 471	LRI(9, POSTED),
 472	REG16(0x3a8),
 473	REG16(0x28c),
 474	REG16(0x288),
 475	REG16(0x284),
 476	REG16(0x280),
 477	REG16(0x27c),
 478	REG16(0x278),
 479	REG16(0x274),
 480	REG16(0x270),
 481
 482	LRI(3, POSTED),
 483	REG(0x1b0),
 484	REG16(0x5a8),
 485	REG16(0x5ac),
 486
 487	NOP(6),
 488	LRI(1, 0),
 489	REG(0x0c8),
 490	NOP(3 + 9 + 1),
 491
 492	LRI(51, POSTED),
 493	REG16(0x588),
 494	REG16(0x588),
 495	REG16(0x588),
 496	REG16(0x588),
 497	REG16(0x588),
 498	REG16(0x588),
 499	REG(0x028),
 500	REG(0x09c),
 501	REG(0x0c0),
 502	REG(0x178),
 503	REG(0x17c),
 504	REG16(0x358),
 505	REG(0x170),
 506	REG(0x150),
 507	REG(0x154),
 508	REG(0x158),
 509	REG16(0x41c),
 510	REG16(0x600),
 511	REG16(0x604),
 512	REG16(0x608),
 513	REG16(0x60c),
 514	REG16(0x610),
 515	REG16(0x614),
 516	REG16(0x618),
 517	REG16(0x61c),
 518	REG16(0x620),
 519	REG16(0x624),
 520	REG16(0x628),
 521	REG16(0x62c),
 522	REG16(0x630),
 523	REG16(0x634),
 524	REG16(0x638),
 525	REG16(0x63c),
 526	REG16(0x640),
 527	REG16(0x644),
 528	REG16(0x648),
 529	REG16(0x64c),
 530	REG16(0x650),
 531	REG16(0x654),
 532	REG16(0x658),
 533	REG16(0x65c),
 534	REG16(0x660),
 535	REG16(0x664),
 536	REG16(0x668),
 537	REG16(0x66c),
 538	REG16(0x670),
 539	REG16(0x674),
 540	REG16(0x678),
 541	REG16(0x67c),
 542	REG(0x068),
 543	REG(0x084),
 544	NOP(1),
 545
 546	END
 547};
 548
 549static const u8 dg2_rcs_offsets[] = {
 550	NOP(1),
 551	LRI(15, POSTED),
 552	REG16(0x244),
 553	REG(0x034),
 554	REG(0x030),
 555	REG(0x038),
 556	REG(0x03c),
 557	REG(0x168),
 558	REG(0x140),
 559	REG(0x110),
 560	REG(0x1c0),
 561	REG(0x1c4),
 562	REG(0x1c8),
 563	REG(0x180),
 564	REG16(0x2b4),
 565	REG(0x120),
 566	REG(0x124),
 567
 568	NOP(1),
 569	LRI(9, POSTED),
 570	REG16(0x3a8),
 571	REG16(0x28c),
 572	REG16(0x288),
 573	REG16(0x284),
 574	REG16(0x280),
 575	REG16(0x27c),
 576	REG16(0x278),
 577	REG16(0x274),
 578	REG16(0x270),
 579
 580	LRI(3, POSTED),
 581	REG(0x1b0),
 582	REG16(0x5a8),
 583	REG16(0x5ac),
 584
 585	NOP(6),
 586	LRI(1, 0),
 587	REG(0x0c8),
 588
 589	END
 590};
 591
 592static const u8 mtl_rcs_offsets[] = {
 593	NOP(1),
 594	LRI(15, POSTED),
 595	REG16(0x244),
 596	REG(0x034),
 597	REG(0x030),
 598	REG(0x038),
 599	REG(0x03c),
 600	REG(0x168),
 601	REG(0x140),
 602	REG(0x110),
 603	REG(0x1c0),
 604	REG(0x1c4),
 605	REG(0x1c8),
 606	REG(0x180),
 607	REG16(0x2b4),
 608	REG(0x120),
 609	REG(0x124),
 610
 611	NOP(1),
 612	LRI(9, POSTED),
 613	REG16(0x3a8),
 614	REG16(0x28c),
 615	REG16(0x288),
 616	REG16(0x284),
 617	REG16(0x280),
 618	REG16(0x27c),
 619	REG16(0x278),
 620	REG16(0x274),
 621	REG16(0x270),
 622
 623	NOP(2),
 624	LRI(2, POSTED),
 625	REG16(0x5a8),
 626	REG16(0x5ac),
 627
 628	NOP(6),
 629	LRI(1, 0),
 630	REG(0x0c8),
 631
 632	END
 633};
 634
 635#undef END
 636#undef REG16
 637#undef REG
 638#undef LRI
 639#undef NOP
 640
 641static const u8 *reg_offsets(const struct intel_engine_cs *engine)
 642{
 643	/*
 644	 * The gen12+ lists only have the registers we program in the basic
 645	 * default state. We rely on the context image using relative
 646	 * addressing to automatic fixup the register state between the
 647	 * physical engines for virtual engine.
 648	 */
 649	GEM_BUG_ON(GRAPHICS_VER(engine->i915) >= 12 &&
 650		   !intel_engine_has_relative_mmio(engine));
 651
 652	if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
 653		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
 654			return mtl_rcs_offsets;
 655		else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 656			return dg2_rcs_offsets;
 657		else if (GRAPHICS_VER(engine->i915) >= 12)
 658			return gen12_rcs_offsets;
 659		else if (GRAPHICS_VER(engine->i915) >= 11)
 660			return gen11_rcs_offsets;
 661		else if (GRAPHICS_VER(engine->i915) >= 9)
 662			return gen9_rcs_offsets;
 663		else
 664			return gen8_rcs_offsets;
 665	} else {
 666		if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 667			return dg2_xcs_offsets;
 668		else if (GRAPHICS_VER(engine->i915) >= 12)
 669			return gen12_xcs_offsets;
 670		else if (GRAPHICS_VER(engine->i915) >= 9)
 671			return gen9_xcs_offsets;
 672		else
 673			return gen8_xcs_offsets;
 674	}
 675}
 676
 677static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
 678{
 679	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 680		return 0x70;
 681	else if (GRAPHICS_VER(engine->i915) >= 12)
 682		return 0x60;
 683	else if (GRAPHICS_VER(engine->i915) >= 9)
 684		return 0x54;
 685	else if (engine->class == RENDER_CLASS)
 686		return 0x58;
 687	else
 688		return -1;
 689}
 690
 691static int lrc_ring_bb_offset(const struct intel_engine_cs *engine)
 692{
 693	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 694		return 0x80;
 695	else if (GRAPHICS_VER(engine->i915) >= 12)
 696		return 0x70;
 697	else if (GRAPHICS_VER(engine->i915) >= 9)
 698		return 0x64;
 699	else if (GRAPHICS_VER(engine->i915) >= 8 &&
 700		 engine->class == RENDER_CLASS)
 701		return 0xc4;
 702	else
 703		return -1;
 704}
 705
 706static int lrc_ring_gpr0(const struct intel_engine_cs *engine)
 707{
 708	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 709		return 0x84;
 710	else if (GRAPHICS_VER(engine->i915) >= 12)
 711		return 0x74;
 712	else if (GRAPHICS_VER(engine->i915) >= 9)
 713		return 0x68;
 714	else if (engine->class == RENDER_CLASS)
 715		return 0xd8;
 716	else
 717		return -1;
 718}
 719
 720static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine)
 721{
 722	if (GRAPHICS_VER(engine->i915) >= 12)
 723		return 0x12;
 724	else if (GRAPHICS_VER(engine->i915) >= 9 || engine->class == RENDER_CLASS)
 725		return 0x18;
 726	else
 727		return -1;
 728}
 729
 730static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine)
 731{
 732	int x;
 733
 734	x = lrc_ring_wa_bb_per_ctx(engine);
 735	if (x < 0)
 736		return x;
 737
 738	return x + 2;
 739}
 740
 741static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine)
 742{
 743	int x;
 744
 745	x = lrc_ring_indirect_ptr(engine);
 746	if (x < 0)
 747		return x;
 748
 749	return x + 2;
 750}
 751
 752static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
 753{
 754
 755	if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
 756		/*
 757		 * Note that the CSFE context has a dummy slot for CMD_BUF_CCTL
 758		 * simply to match the RCS context image layout.
 759		 */
 760		return 0xc6;
 761	else if (engine->class != RENDER_CLASS)
 762		return -1;
 763	else if (GRAPHICS_VER(engine->i915) >= 12)
 764		return 0xb6;
 765	else if (GRAPHICS_VER(engine->i915) >= 11)
 766		return 0xaa;
 767	else
 768		return -1;
 769}
 770
 771static u32
 772lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
 773{
 774	if (GRAPHICS_VER(engine->i915) >= 12)
 775		return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 776	else if (GRAPHICS_VER(engine->i915) >= 11)
 777		return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 778	else if (GRAPHICS_VER(engine->i915) >= 9)
 779		return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 780	else if (GRAPHICS_VER(engine->i915) >= 8)
 781		return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
 782
 783	GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
 784
 785	return 0;
 786}
 787
 788static void
 789lrc_setup_bb_per_ctx(u32 *regs,
 790		     const struct intel_engine_cs *engine,
 791		     u32 ctx_bb_ggtt_addr)
 792{
 793	GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
 794	regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
 795		ctx_bb_ggtt_addr |
 796		PER_CTX_BB_FORCE |
 797		PER_CTX_BB_VALID;
 798}
 799
 800static void
 801lrc_setup_indirect_ctx(u32 *regs,
 802		       const struct intel_engine_cs *engine,
 803		       u32 ctx_bb_ggtt_addr,
 804		       u32 size)
 805{
 806	GEM_BUG_ON(!size);
 807	GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
 808	GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
 809	regs[lrc_ring_indirect_ptr(engine) + 1] =
 810		ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
 811
 812	GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
 813	regs[lrc_ring_indirect_offset(engine) + 1] =
 814		lrc_ring_indirect_offset_default(engine) << 6;
 815}
 816
 817static bool ctx_needs_runalone(const struct intel_context *ce)
 818{
 819	struct i915_gem_context *gem_ctx;
 820	bool ctx_is_protected = false;
 821
 822	/*
 823	 * Wa_14019159160 - Case 2.
 824	 * On some platforms, protected contexts require setting
 825	 * the LRC run-alone bit or else the encryption/decryption will not happen.
 826	 * NOTE: Case 2 only applies to PXP use-case of said workaround.
 827	 */
 828	if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) &&
 829	    (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) {
 830		rcu_read_lock();
 831		gem_ctx = rcu_dereference(ce->gem_context);
 832		if (gem_ctx)
 833			ctx_is_protected = gem_ctx->uses_protected_content;
 834		rcu_read_unlock();
 835	}
 836
 837	return ctx_is_protected;
 838}
 839
 840static void init_common_regs(u32 * const regs,
 841			     const struct intel_context *ce,
 842			     const struct intel_engine_cs *engine,
 843			     bool inhibit)
 844{
 845	u32 ctl;
 846	int loc;
 847
 848	ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
 849	ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 850	if (inhibit)
 851		ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
 852	if (GRAPHICS_VER(engine->i915) < 11)
 853		ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
 854					   CTX_CTRL_RS_CTX_ENABLE);
 855	/* Wa_14019159160 - Case 2.*/
 856	if (ctx_needs_runalone(ce))
 857		ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE);
 858	regs[CTX_CONTEXT_CONTROL] = ctl;
 859
 860	regs[CTX_TIMESTAMP] = ce->stats.runtime.last;
 861
 862	loc = lrc_ring_bb_offset(engine);
 863	if (loc != -1)
 864		regs[loc + 1] = 0;
 865}
 866
 867static void init_wa_bb_regs(u32 * const regs,
 868			    const struct intel_engine_cs *engine)
 869{
 870	const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
 871
 872	if (wa_ctx->per_ctx.size) {
 873		const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 874
 875		GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
 876		regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
 877			(ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
 878	}
 879
 880	if (wa_ctx->indirect_ctx.size) {
 881		lrc_setup_indirect_ctx(regs, engine,
 882				       i915_ggtt_offset(wa_ctx->vma) +
 883				       wa_ctx->indirect_ctx.offset,
 884				       wa_ctx->indirect_ctx.size);
 885	}
 886}
 887
 888static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt)
 889{
 890	if (i915_vm_is_4lvl(&ppgtt->vm)) {
 891		/* 64b PPGTT (48bit canonical)
 892		 * PDP0_DESCRIPTOR contains the base address to PML4 and
 893		 * other PDP Descriptors are ignored.
 894		 */
 895		ASSIGN_CTX_PML4(ppgtt, regs);
 896	} else {
 897		ASSIGN_CTX_PDP(ppgtt, regs, 3);
 898		ASSIGN_CTX_PDP(ppgtt, regs, 2);
 899		ASSIGN_CTX_PDP(ppgtt, regs, 1);
 900		ASSIGN_CTX_PDP(ppgtt, regs, 0);
 901	}
 902}
 903
 904static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
 905{
 906	if (i915_is_ggtt(vm))
 907		return i915_vm_to_ggtt(vm)->alias;
 908	else
 909		return i915_vm_to_ppgtt(vm);
 910}
 911
 912static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
 913{
 914	int x;
 915
 916	x = lrc_ring_mi_mode(engine);
 917	if (x != -1) {
 918		regs[x + 1] &= ~STOP_RING;
 919		regs[x + 1] |= STOP_RING << 16;
 920	}
 921}
 922
 923static void __lrc_init_regs(u32 *regs,
 924			    const struct intel_context *ce,
 925			    const struct intel_engine_cs *engine,
 926			    bool inhibit)
 927{
 928	/*
 929	 * A context is actually a big batch buffer with several
 930	 * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
 931	 * values we are setting here are only for the first context restore:
 932	 * on a subsequent save, the GPU will recreate this batchbuffer with new
 933	 * values (including all the missing MI_LOAD_REGISTER_IMM commands that
 934	 * we are not initializing here).
 935	 *
 936	 * Must keep consistent with virtual_update_register_offsets().
 937	 */
 938
 939	if (inhibit)
 940		memset(regs, 0, PAGE_SIZE);
 941
 942	set_offsets(regs, reg_offsets(engine), engine, inhibit);
 943
 944	init_common_regs(regs, ce, engine, inhibit);
 945	init_ppgtt_regs(regs, vm_alias(ce->vm));
 946
 947	init_wa_bb_regs(regs, engine);
 948
 949	__reset_stop_ring(regs, engine);
 950}
 951
 952void lrc_init_regs(const struct intel_context *ce,
 953		   const struct intel_engine_cs *engine,
 954		   bool inhibit)
 955{
 956	__lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit);
 957}
 958
 959void lrc_reset_regs(const struct intel_context *ce,
 960		    const struct intel_engine_cs *engine)
 961{
 962	__reset_stop_ring(ce->lrc_reg_state, engine);
 963}
 964
 965static void
 966set_redzone(void *vaddr, const struct intel_engine_cs *engine)
 967{
 968	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 969		return;
 970
 971	vaddr += engine->context_size;
 972
 973	memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
 974}
 975
 976static void
 977check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
 978{
 979	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
 980		return;
 981
 982	vaddr += engine->context_size;
 983
 984	if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
 985		drm_err_once(&engine->i915->drm,
 986			     "%s context redzone overwritten!\n",
 987			     engine->name);
 988}
 989
 990static u32 context_wa_bb_offset(const struct intel_context *ce)
 991{
 992	return PAGE_SIZE * ce->wa_bb_page;
 993}
 994
 995/*
 996 * per_ctx below determines which WABB section is used.
 997 * When true, the function returns the location of the
 998 * PER_CTX_BB.  When false, the function returns the
 999 * location of the INDIRECT_CTX.
1000 */
1001static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
1002{
1003	void *ptr;
1004
1005	GEM_BUG_ON(!ce->wa_bb_page);
1006
1007	ptr = ce->lrc_reg_state;
1008	ptr -= LRC_STATE_OFFSET; /* back to start of context image */
1009	ptr += context_wa_bb_offset(ce);
1010	ptr += per_ctx ? PAGE_SIZE : 0;
1011
1012	return ptr;
1013}
1014
1015void lrc_init_state(struct intel_context *ce,
1016		    struct intel_engine_cs *engine,
1017		    void *state)
1018{
1019	bool inhibit = true;
1020
1021	set_redzone(state, engine);
1022
1023	if (ce->default_state) {
1024		shmem_read(ce->default_state, 0, state, engine->context_size);
1025		__set_bit(CONTEXT_VALID_BIT, &ce->flags);
1026		inhibit = false;
1027	}
1028
1029	/* Clear the ppHWSP (inc. per-context counters) */
1030	memset(state, 0, PAGE_SIZE);
1031
1032	/* Clear the indirect wa and storage */
1033	if (ce->wa_bb_page)
1034		memset(state + context_wa_bb_offset(ce), 0, PAGE_SIZE);
1035
1036	/*
1037	 * The second page of the context object contains some registers which
1038	 * must be set up prior to the first execution.
1039	 */
1040	__lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit);
1041}
1042
1043u32 lrc_indirect_bb(const struct intel_context *ce)
1044{
1045	return i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce);
1046}
1047
1048static u32 *setup_predicate_disable_wa(const struct intel_context *ce, u32 *cs)
1049{
1050	/* If predication is active, this will be noop'ed */
1051	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1052	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1053	*cs++ = 0;
1054	*cs++ = 0; /* No predication */
1055
1056	/* predicated end, only terminates if SET_PREDICATE_RESULT:0 is clear */
1057	*cs++ = MI_BATCH_BUFFER_END | BIT(15);
1058	*cs++ = MI_SET_PREDICATE | MI_SET_PREDICATE_DISABLE;
1059
1060	/* Instructions are no longer predicated (disabled), we can proceed */
1061	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT | (4 - 2);
1062	*cs++ = lrc_indirect_bb(ce) + DG2_PREDICATE_RESULT_WA;
1063	*cs++ = 0;
1064	*cs++ = 1; /* enable predication before the next BB */
1065
1066	*cs++ = MI_BATCH_BUFFER_END;
1067	GEM_BUG_ON(offset_in_page(cs) > DG2_PREDICATE_RESULT_WA);
1068
1069	return cs;
1070}
1071
1072static struct i915_vma *
1073__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
1074{
1075	struct drm_i915_gem_object *obj;
1076	struct i915_vma *vma;
1077	u32 context_size;
1078
1079	context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
1080
1081	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
1082		context_size += I915_GTT_PAGE_SIZE; /* for redzone */
1083
1084	if (GRAPHICS_VER(engine->i915) >= 12) {
1085		ce->wa_bb_page = context_size / PAGE_SIZE;
1086		/* INDIRECT_CTX and PER_CTX_BB need separate pages. */
1087		context_size += PAGE_SIZE * 2;
1088	}
1089
1090	if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
1091		ce->parallel.guc.parent_page = context_size / PAGE_SIZE;
1092		context_size += PARENT_SCRATCH_SIZE;
1093	}
1094
1095	obj = i915_gem_object_create_lmem(engine->i915, context_size,
1096					  I915_BO_ALLOC_PM_VOLATILE);
1097	if (IS_ERR(obj)) {
1098		obj = i915_gem_object_create_shmem(engine->i915, context_size);
1099		if (IS_ERR(obj))
1100			return ERR_CAST(obj);
1101
1102		/*
1103		 * Wa_22016122933: For Media version 13.0, all Media GT shared
1104		 * memory needs to be mapped as WC on CPU side and UC (PAT
1105		 * index 2) on GPU side.
1106		 */
1107		if (intel_gt_needs_wa_22016122933(engine->gt))
1108			i915_gem_object_set_cache_coherency(obj, I915_CACHE_NONE);
1109	}
1110
1111	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1112	if (IS_ERR(vma)) {
1113		i915_gem_object_put(obj);
1114		return vma;
1115	}
1116
1117	return vma;
1118}
1119
1120static struct intel_timeline *
1121pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine)
1122{
1123	struct intel_timeline *tl = fetch_and_zero(&ce->timeline);
1124
1125	return intel_timeline_create_from_engine(engine, page_unmask_bits(tl));
1126}
1127
1128int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine)
1129{
1130	struct intel_ring *ring;
1131	struct i915_vma *vma;
1132	int err;
1133
1134	GEM_BUG_ON(ce->state);
1135
1136	if (!intel_context_has_own_state(ce))
1137		ce->default_state = engine->default_state;
1138
1139	vma = __lrc_alloc_state(ce, engine);
1140	if (IS_ERR(vma))
1141		return PTR_ERR(vma);
1142
1143	ring = intel_engine_create_ring(engine, ce->ring_size);
1144	if (IS_ERR(ring)) {
1145		err = PTR_ERR(ring);
1146		goto err_vma;
1147	}
1148
1149	if (!page_mask_bits(ce->timeline)) {
1150		struct intel_timeline *tl;
1151
1152		/*
1153		 * Use the static global HWSP for the kernel context, and
1154		 * a dynamically allocated cacheline for everyone else.
1155		 */
1156		if (unlikely(ce->timeline))
1157			tl = pinned_timeline(ce, engine);
1158		else
1159			tl = intel_timeline_create(engine->gt);
1160		if (IS_ERR(tl)) {
1161			err = PTR_ERR(tl);
1162			goto err_ring;
1163		}
1164
1165		ce->timeline = tl;
1166	}
1167
1168	ce->ring = ring;
1169	ce->state = vma;
1170
1171	return 0;
1172
1173err_ring:
1174	intel_ring_put(ring);
1175err_vma:
1176	i915_vma_put(vma);
1177	return err;
1178}
1179
1180void lrc_reset(struct intel_context *ce)
1181{
1182	GEM_BUG_ON(!intel_context_is_pinned(ce));
1183
1184	intel_ring_reset(ce->ring, ce->ring->emit);
1185
1186	/* Scrub away the garbage */
1187	lrc_init_regs(ce, ce->engine, true);
1188	ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail);
1189}
1190
1191int
1192lrc_pre_pin(struct intel_context *ce,
1193	    struct intel_engine_cs *engine,
1194	    struct i915_gem_ww_ctx *ww,
1195	    void **vaddr)
1196{
1197	GEM_BUG_ON(!ce->state);
1198	GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
1199
1200	*vaddr = i915_gem_object_pin_map(ce->state->obj,
1201					 intel_gt_coherent_map_type(ce->engine->gt,
1202								    ce->state->obj,
1203								    false) |
1204					 I915_MAP_OVERRIDE);
1205
1206	return PTR_ERR_OR_ZERO(*vaddr);
1207}
1208
1209int
1210lrc_pin(struct intel_context *ce,
1211	struct intel_engine_cs *engine,
1212	void *vaddr)
1213{
1214	ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET;
1215
1216	if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags))
1217		lrc_init_state(ce, engine, vaddr);
1218
1219	ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail);
1220	return 0;
1221}
1222
1223void lrc_unpin(struct intel_context *ce)
1224{
1225	if (unlikely(ce->parallel.last_rq)) {
1226		i915_request_put(ce->parallel.last_rq);
1227		ce->parallel.last_rq = NULL;
1228	}
1229	check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET,
1230		      ce->engine);
1231}
1232
1233void lrc_post_unpin(struct intel_context *ce)
1234{
1235	i915_gem_object_unpin_map(ce->state->obj);
1236}
1237
1238void lrc_fini(struct intel_context *ce)
1239{
1240	if (!ce->state)
1241		return;
1242
1243	intel_ring_put(fetch_and_zero(&ce->ring));
1244	i915_vma_put(fetch_and_zero(&ce->state));
1245}
1246
1247void lrc_destroy(struct kref *kref)
1248{
1249	struct intel_context *ce = container_of(kref, typeof(*ce), ref);
1250
1251	GEM_BUG_ON(!i915_active_is_idle(&ce->active));
1252	GEM_BUG_ON(intel_context_is_pinned(ce));
1253
1254	lrc_fini(ce);
1255
1256	intel_context_fini(ce);
1257	intel_context_free(ce);
1258}
1259
1260static u32 *
1261gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
1262{
1263	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1264		MI_SRM_LRM_GLOBAL_GTT |
1265		MI_LRI_LRM_CS_MMIO;
1266	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1267	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1268		CTX_TIMESTAMP * sizeof(u32);
1269	*cs++ = 0;
1270
1271	*cs++ = MI_LOAD_REGISTER_REG |
1272		MI_LRR_SOURCE_CS_MMIO |
1273		MI_LRI_LRM_CS_MMIO;
1274	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1275	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1276
1277	*cs++ = MI_LOAD_REGISTER_REG |
1278		MI_LRR_SOURCE_CS_MMIO |
1279		MI_LRI_LRM_CS_MMIO;
1280	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1281	*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
1282
1283	return cs;
1284}
1285
1286static u32 *
1287gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
1288{
1289	GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
1290
1291	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1292		MI_SRM_LRM_GLOBAL_GTT |
1293		MI_LRI_LRM_CS_MMIO;
1294	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1295	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1296		(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
1297	*cs++ = 0;
1298
1299	return cs;
1300}
1301
1302static u32 *
1303gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs)
1304{
1305	GEM_BUG_ON(lrc_ring_cmd_buf_cctl(ce->engine) == -1);
1306
1307	*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
1308		MI_SRM_LRM_GLOBAL_GTT |
1309		MI_LRI_LRM_CS_MMIO;
1310	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1311	*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
1312		(lrc_ring_cmd_buf_cctl(ce->engine) + 1) * sizeof(u32);
1313	*cs++ = 0;
1314
1315	*cs++ = MI_LOAD_REGISTER_REG |
1316		MI_LRR_SOURCE_CS_MMIO |
1317		MI_LRI_LRM_CS_MMIO;
1318	*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
1319	*cs++ = i915_mmio_reg_offset(RING_CMD_BUF_CCTL(0));
1320
1321	return cs;
1322}
1323
1324/*
1325 * The bspec's tuning guide asks us to program a vertical watermark value of
1326 * 0x3FF.  However this register is not saved/restored properly by the
1327 * hardware, so we're required to apply the desired value via INDIRECT_CTX
1328 * batch buffer to ensure the value takes effect properly.  All other bits
1329 * in this register should remain at 0 (the hardware default).
1330 */
1331static u32 *
1332dg2_emit_draw_watermark_setting(u32 *cs)
1333{
1334	*cs++ = MI_LOAD_REGISTER_IMM(1);
1335	*cs++ = i915_mmio_reg_offset(DRAW_WATERMARK);
1336	*cs++ = REG_FIELD_PREP(VERT_WM_VAL, 0x3FF);
1337
1338	return cs;
1339}
1340
1341static u32 *
1342gen12_invalidate_state_cache(u32 *cs)
1343{
1344	*cs++ = MI_LOAD_REGISTER_IMM(1);
1345	*cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2);
1346	*cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
1347	return cs;
1348}
1349
1350static u32 *
1351gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
1352{
1353	cs = gen12_emit_timestamp_wa(ce, cs);
1354	cs = gen12_emit_cmd_buf_wa(ce, cs);
1355	cs = gen12_emit_restore_scratch(ce, cs);
1356
1357	/* Wa_16013000631:dg2 */
1358	if (IS_DG2_G11(ce->engine->i915))
1359		cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
1360
1361	cs = gen12_emit_aux_table_inv(ce->engine, cs);
1362
1363	/* Wa_18022495364 */
1364	if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10)))
1365		cs = gen12_invalidate_state_cache(cs);
1366
1367	/* Wa_16014892111 */
1368	if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
1369	    IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
1370	    IS_DG2(ce->engine->i915))
1371		cs = dg2_emit_draw_watermark_setting(cs);
1372
1373	return cs;
1374}
1375
1376static u32 *
1377gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
1378{
1379	cs = gen12_emit_timestamp_wa(ce, cs);
1380	cs = gen12_emit_restore_scratch(ce, cs);
1381
1382	/* Wa_16013000631:dg2 */
1383	if (IS_DG2_G11(ce->engine->i915))
1384		if (ce->engine->class == COMPUTE_CLASS)
1385			cs = gen8_emit_pipe_control(cs,
1386						    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
1387						    0);
1388
1389	return gen12_emit_aux_table_inv(ce->engine, cs);
1390}
1391
1392static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
1393{
1394	struct intel_gt *gt = ce->engine->gt;
1395	int mocs = gt->mocs.uc_index << 1;
1396
1397	/**
1398	 * Wa_16018031267 / Wa_16018063123 requires that SW forces the
1399	 * main copy engine arbitration into round robin mode.  We
1400	 * additionally need to submit the following WABB blt command
1401	 * to produce 4 subblits with each subblit generating 0 byte
1402	 * write requests as WABB:
1403	 *
1404	 * XY_FASTCOLOR_BLT
1405	 *  BG0    -> 5100000E
1406	 *  BG1    -> 0000003F (Dest pitch)
1407	 *  BG2    -> 00000000 (X1, Y1) = (0, 0)
1408	 *  BG3    -> 00040001 (X2, Y2) = (1, 4)
1409	 *  BG4    -> scratch
1410	 *  BG5    -> scratch
1411	 *  BG6-12 -> 00000000
1412	 *  BG13   -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
1413	 *  BG14   -> 00000010 (Qpitch = 4)
1414	 *  BG15   -> 00000000
1415	 */
1416	*cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
1417	*cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
1418	*cs++ = 0;
1419	*cs++ = 4 << 16 | 1;
1420	*cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
1421	*cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
1422	*cs++ = 0;
1423	*cs++ = 0;
1424	*cs++ = 0;
1425	*cs++ = 0;
1426	*cs++ = 0;
1427	*cs++ = 0;
1428	*cs++ = 0;
1429	*cs++ = 0x20004004;
1430	*cs++ = 0x10;
1431	*cs++ = 0;
1432
1433	return cs;
1434}
1435
1436static u32 *
1437xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
1438{
1439	/* Wa_16018031267, Wa_16018063123 */
1440	if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
1441		cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
1442
1443	return cs;
1444}
1445
1446static void
1447setup_per_ctx_bb(const struct intel_context *ce,
1448		 const struct intel_engine_cs *engine,
1449		 u32 *(*emit)(const struct intel_context *, u32 *))
1450{
1451	/* Place PER_CTX_BB on next page after INDIRECT_CTX */
1452	u32 * const start = context_wabb(ce, true);
1453	u32 *cs;
1454
1455	cs = emit(ce, start);
1456
1457	/* PER_CTX_BB must manually terminate */
1458	*cs++ = MI_BATCH_BUFFER_END;
1459
1460	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1461	lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine,
1462			     lrc_indirect_bb(ce) + PAGE_SIZE);
1463}
1464
1465static void
1466setup_indirect_ctx_bb(const struct intel_context *ce,
1467		      const struct intel_engine_cs *engine,
1468		      u32 *(*emit)(const struct intel_context *, u32 *))
1469{
1470	u32 * const start = context_wabb(ce, false);
1471	u32 *cs;
1472
1473	cs = emit(ce, start);
1474	GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
1475	while ((unsigned long)cs % CACHELINE_BYTES)
1476		*cs++ = MI_NOOP;
1477
1478	GEM_BUG_ON(cs - start > DG2_PREDICATE_RESULT_BB / sizeof(*start));
1479	setup_predicate_disable_wa(ce, start + DG2_PREDICATE_RESULT_BB / sizeof(*start));
1480
1481	lrc_setup_indirect_ctx(ce->lrc_reg_state, engine,
1482			       lrc_indirect_bb(ce),
1483			       (cs - start) * sizeof(*cs));
1484}
1485
1486/*
1487 * The context descriptor encodes various attributes of a context,
1488 * including its GTT address and some flags. Because it's fairly
1489 * expensive to calculate, we'll just do it once and cache the result,
1490 * which remains valid until the context is unpinned.
1491 *
1492 * This is what a descriptor looks like, from LSB to MSB::
1493 *
1494 *      bits  0-11:    flags, GEN8_CTX_* (cached in ctx->desc_template)
1495 *      bits 12-31:    LRCA, GTT address of (the HWSP of) this context
1496 *      bits 32-52:    ctx ID, a globally unique tag (highest bit used by GuC)
1497 *      bits 53-54:    mbz, reserved for use by hardware
1498 *      bits 55-63:    group ID, currently unused and set to 0
1499 *
1500 * Starting from Gen11, the upper dword of the descriptor has a new format:
1501 *
1502 *      bits 32-36:    reserved
1503 *      bits 37-47:    SW context ID
1504 *      bits 48:53:    engine instance
1505 *      bit 54:        mbz, reserved for use by hardware
1506 *      bits 55-60:    SW counter
1507 *      bits 61-63:    engine class
1508 *
1509 * On Xe_HP, the upper dword of the descriptor has a new format:
1510 *
1511 *      bits 32-37:    virtual function number
1512 *      bit 38:        mbz, reserved for use by hardware
1513 *      bits 39-54:    SW context ID
1514 *      bits 55-57:    reserved
1515 *      bits 58-63:    SW counter
1516 *
1517 * engine info, SW context ID and SW counter need to form a unique number
1518 * (Context ID) per lrc.
1519 */
1520static u32 lrc_descriptor(const struct intel_context *ce)
1521{
1522	u32 desc;
1523
1524	desc = INTEL_LEGACY_32B_CONTEXT;
1525	if (i915_vm_is_4lvl(ce->vm))
1526		desc = INTEL_LEGACY_64B_CONTEXT;
1527	desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
1528
1529	desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
1530	if (GRAPHICS_VER(ce->vm->i915) == 8)
1531		desc |= GEN8_CTX_L3LLC_COHERENT;
1532
1533	return i915_ggtt_offset(ce->state) | desc;
1534}
1535
1536u32 lrc_update_regs(const struct intel_context *ce,
1537		    const struct intel_engine_cs *engine,
1538		    u32 head)
1539{
1540	struct intel_ring *ring = ce->ring;
1541	u32 *regs = ce->lrc_reg_state;
1542
1543	GEM_BUG_ON(!intel_ring_offset_valid(ring, head));
1544	GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
1545
1546	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1547	regs[CTX_RING_HEAD] = head;
1548	regs[CTX_RING_TAIL] = ring->tail;
1549	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1550
1551	/* RPCS */
1552	if (engine->class == RENDER_CLASS) {
1553		regs[CTX_R_PWR_CLK_STATE] =
1554			intel_sseu_make_rpcs(engine->gt, &ce->sseu);
1555
1556		i915_oa_init_reg_state(ce, engine);
1557	}
1558
1559	if (ce->wa_bb_page) {
1560		u32 *(*fn)(const struct intel_context *ce, u32 *cs);
1561
1562		fn = gen12_emit_indirect_ctx_xcs;
1563		if (ce->engine->class == RENDER_CLASS)
1564			fn = gen12_emit_indirect_ctx_rcs;
1565
1566		/* Mutually exclusive wrt to global indirect bb */
1567		GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
1568		setup_indirect_ctx_bb(ce, engine, fn);
1569		setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb);
1570	}
1571
1572	return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
1573}
1574
1575void lrc_update_offsets(struct intel_context *ce,
1576			struct intel_engine_cs *engine)
1577{
1578	set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false);
1579}
1580
1581void lrc_check_regs(const struct intel_context *ce,
1582		    const struct intel_engine_cs *engine,
1583		    const char *when)
1584{
1585	const struct intel_ring *ring = ce->ring;
1586	u32 *regs = ce->lrc_reg_state;
1587	bool valid = true;
1588	int x;
1589
1590	if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
1591		pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
1592		       engine->name,
1593		       regs[CTX_RING_START],
1594		       i915_ggtt_offset(ring->vma));
1595		regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
1596		valid = false;
1597	}
1598
1599	if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
1600	    (RING_CTL_SIZE(ring->size) | RING_VALID)) {
1601		pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
1602		       engine->name,
1603		       regs[CTX_RING_CTL],
1604		       (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
1605		regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
1606		valid = false;
1607	}
1608
1609	x = lrc_ring_mi_mode(engine);
1610	if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
1611		pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
1612		       engine->name, regs[x + 1]);
1613		regs[x + 1] &= ~STOP_RING;
1614		regs[x + 1] |= STOP_RING << 16;
1615		valid = false;
1616	}
1617
1618	WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when);
1619}
1620
1621/*
1622 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1623 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1624 * but there is a slight complication as this is applied in WA batch where the
1625 * values are only initialized once so we cannot take register value at the
1626 * beginning and reuse it further; hence we save its value to memory, upload a
1627 * constant value with bit21 set and then we restore it back with the saved value.
1628 * To simplify the WA, a constant value is formed by using the default value
1629 * of this register. This shouldn't be a problem because we are only modifying
1630 * it for a short period and this batch in non-premptible. We can ofcourse
1631 * use additional instructions that read the actual value of the register
1632 * at that time and set our bit of interest but it makes the WA complicated.
1633 *
1634 * This WA is also required for Gen9 so extracting as a function avoids
1635 * code duplication.
1636 */
1637static u32 *
1638gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
1639{
1640	/* NB no one else is allowed to scribble over scratch + 256! */
1641	*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1642	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1643	*batch++ = intel_gt_scratch_offset(engine->gt,
1644					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1645	*batch++ = 0;
1646
1647	*batch++ = MI_LOAD_REGISTER_IMM(1);
1648	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1649	*batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
1650
1651	batch = gen8_emit_pipe_control(batch,
1652				       PIPE_CONTROL_CS_STALL |
1653				       PIPE_CONTROL_DC_FLUSH_ENABLE,
1654				       0);
1655
1656	*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
1657	*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
1658	*batch++ = intel_gt_scratch_offset(engine->gt,
1659					   INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
1660	*batch++ = 0;
1661
1662	return batch;
1663}
1664
1665/*
1666 * Typically we only have one indirect_ctx and per_ctx batch buffer which are
1667 * initialized at the beginning and shared across all contexts but this field
1668 * helps us to have multiple batches at different offsets and select them based
1669 * on a criteria. At the moment this batch always start at the beginning of the page
1670 * and at this point we don't have multiple wa_ctx batch buffers.
1671 *
1672 * The number of WA applied are not known at the beginning; we use this field
1673 * to return the no of DWORDS written.
1674 *
1675 * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1676 * so it adds NOOPs as padding to make it cacheline aligned.
1677 * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1678 * makes a complete batch buffer.
1679 */
1680static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1681{
1682	/* WaDisableCtxRestoreArbitration:bdw,chv */
1683	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1684
1685	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1686	if (IS_BROADWELL(engine->i915))
1687		batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1688
1689	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1690	/* Actual scratch location is at 128 bytes offset */
1691	batch = gen8_emit_pipe_control(batch,
1692				       PIPE_CONTROL_FLUSH_L3 |
1693				       PIPE_CONTROL_STORE_DATA_INDEX |
1694				       PIPE_CONTROL_CS_STALL |
1695				       PIPE_CONTROL_QW_WRITE,
1696				       LRC_PPHWSP_SCRATCH_ADDR);
1697
1698	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1699
1700	/* Pad to end of cacheline */
1701	while ((unsigned long)batch % CACHELINE_BYTES)
1702		*batch++ = MI_NOOP;
1703
1704	/*
1705	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1706	 * execution depends on the length specified in terms of cache lines
1707	 * in the register CTX_RCS_INDIRECT_CTX
1708	 */
1709
1710	return batch;
1711}
1712
1713struct lri {
1714	i915_reg_t reg;
1715	u32 value;
1716};
1717
1718static u32 *emit_lri(u32 *batch, const struct lri *lri, unsigned int count)
1719{
1720	GEM_BUG_ON(!count || count > 63);
1721
1722	*batch++ = MI_LOAD_REGISTER_IMM(count);
1723	do {
1724		*batch++ = i915_mmio_reg_offset(lri->reg);
1725		*batch++ = lri->value;
1726	} while (lri++, --count);
1727	*batch++ = MI_NOOP;
1728
1729	return batch;
1730}
1731
1732static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
1733{
1734	static const struct lri lri[] = {
1735		/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
1736		{
1737			COMMON_SLICE_CHICKEN2,
1738			__MASKED_FIELD(GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE,
1739				       0),
1740		},
1741
1742		/* BSpec: 11391 */
1743		{
1744			FF_SLICE_CHICKEN,
1745			__MASKED_FIELD(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX,
1746				       FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX),
1747		},
1748
1749		/* BSpec: 11299 */
1750		{
1751			_3D_CHICKEN3,
1752			__MASKED_FIELD(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX,
1753				       _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX),
1754		}
1755	};
1756
1757	*batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1758
1759	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
1760	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
1761
1762	/* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
1763	batch = gen8_emit_pipe_control(batch,
1764				       PIPE_CONTROL_FLUSH_L3 |
1765				       PIPE_CONTROL_STORE_DATA_INDEX |
1766				       PIPE_CONTROL_CS_STALL |
1767				       PIPE_CONTROL_QW_WRITE,
1768				       LRC_PPHWSP_SCRATCH_ADDR);
1769
1770	batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
1771
1772	/* WaMediaPoolStateCmdInWABB:bxt,glk */
1773	if (HAS_POOLED_EU(engine->i915)) {
1774		/*
1775		 * EU pool configuration is setup along with golden context
1776		 * during context initialization. This value depends on
1777		 * device type (2x6 or 3x6) and needs to be updated based
1778		 * on which subslice is disabled especially for 2x6
1779		 * devices, however it is safe to load default
1780		 * configuration of 3x6 device instead of masking off
1781		 * corresponding bits because HW ignores bits of a disabled
1782		 * subslice and drops down to appropriate config. Please
1783		 * see render_state_setup() in i915_gem_render_state.c for
1784		 * possible configurations, to avoid duplication they are
1785		 * not shown here again.
1786		 */
1787		*batch++ = GEN9_MEDIA_POOL_STATE;
1788		*batch++ = GEN9_MEDIA_POOL_ENABLE;
1789		*batch++ = 0x00777000;
1790		*batch++ = 0;
1791		*batch++ = 0;
1792		*batch++ = 0;
1793	}
1794
1795	*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1796
1797	/* Pad to end of cacheline */
1798	while ((unsigned long)batch % CACHELINE_BYTES)
1799		*batch++ = MI_NOOP;
1800
1801	return batch;
1802}
1803
1804#define CTX_WA_BB_SIZE (PAGE_SIZE)
1805
1806static int lrc_create_wa_ctx(struct intel_engine_cs *engine)
1807{
1808	struct drm_i915_gem_object *obj;
1809	struct i915_vma *vma;
1810	int err;
1811
1812	obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE);
1813	if (IS_ERR(obj))
1814		return PTR_ERR(obj);
1815
1816	vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
1817	if (IS_ERR(vma)) {
1818		err = PTR_ERR(vma);
1819		goto err;
1820	}
1821
1822	engine->wa_ctx.vma = vma;
1823	return 0;
1824
1825err:
1826	i915_gem_object_put(obj);
1827	return err;
1828}
1829
1830void lrc_fini_wa_ctx(struct intel_engine_cs *engine)
1831{
1832	i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
1833}
1834
1835typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
1836
1837void lrc_init_wa_ctx(struct intel_engine_cs *engine)
1838{
1839	struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
1840	struct i915_wa_ctx_bb *wa_bb[] = {
1841		&wa_ctx->indirect_ctx, &wa_ctx->per_ctx
1842	};
1843	wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)];
1844	struct i915_gem_ww_ctx ww;
1845	void *batch, *batch_ptr;
1846	unsigned int i;
1847	int err;
1848
1849	if (GRAPHICS_VER(engine->i915) >= 11 ||
1850	    !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
1851		return;
1852
1853	if (GRAPHICS_VER(engine->i915) == 9) {
1854		wa_bb_fn[0] = gen9_init_indirectctx_bb;
1855		wa_bb_fn[1] = NULL;
1856	} else if (GRAPHICS_VER(engine->i915) == 8) {
1857		wa_bb_fn[0] = gen8_init_indirectctx_bb;
1858		wa_bb_fn[1] = NULL;
1859	}
1860
1861	err = lrc_create_wa_ctx(engine);
1862	if (err) {
1863		/*
1864		 * We continue even if we fail to initialize WA batch
1865		 * because we only expect rare glitches but nothing
1866		 * critical to prevent us from using GPU
1867		 */
1868		drm_err(&engine->i915->drm,
1869			"Ignoring context switch w/a allocation error:%d\n",
1870			err);
1871		return;
1872	}
1873
1874	if (!engine->wa_ctx.vma)
1875		return;
1876
1877	i915_gem_ww_ctx_init(&ww, true);
1878retry:
1879	err = i915_gem_object_lock(wa_ctx->vma->obj, &ww);
1880	if (!err)
1881		err = i915_ggtt_pin(wa_ctx->vma, &ww, 0, PIN_HIGH);
1882	if (err)
1883		goto err;
1884
1885	batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB);
1886	if (IS_ERR(batch)) {
1887		err = PTR_ERR(batch);
1888		goto err_unpin;
1889	}
1890
1891	/*
1892	 * Emit the two workaround batch buffers, recording the offset from the
1893	 * start of the workaround batch buffer object for each and their
1894	 * respective sizes.
1895	 */
1896	batch_ptr = batch;
1897	for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
1898		wa_bb[i]->offset = batch_ptr - batch;
1899		if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
1900						  CACHELINE_BYTES))) {
1901			err = -EINVAL;
1902			break;
1903		}
1904		if (wa_bb_fn[i])
1905			batch_ptr = wa_bb_fn[i](engine, batch_ptr);
1906		wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
1907	}
1908	GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE);
1909
1910	__i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch);
1911	__i915_gem_object_release_map(wa_ctx->vma->obj);
1912
1913	/* Verify that we can handle failure to setup the wa_ctx */
1914	if (!err)
1915		err = i915_inject_probe_error(engine->i915, -ENODEV);
1916
1917err_unpin:
1918	if (err)
1919		i915_vma_unpin(wa_ctx->vma);
1920err:
1921	if (err == -EDEADLK) {
1922		err = i915_gem_ww_ctx_backoff(&ww);
1923		if (!err)
1924			goto retry;
1925	}
1926	i915_gem_ww_ctx_fini(&ww);
1927
1928	if (err) {
1929		i915_vma_put(engine->wa_ctx.vma);
1930
1931		/* Clear all flags to prevent further use */
1932		memset(wa_ctx, 0, sizeof(*wa_ctx));
1933	}
1934}
1935
1936static void st_runtime_underflow(struct intel_context_stats *stats, s32 dt)
1937{
1938#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1939	stats->runtime.num_underflow++;
1940	stats->runtime.max_underflow =
1941		max_t(u32, stats->runtime.max_underflow, -dt);
1942#endif
1943}
1944
1945static u32 lrc_get_runtime(const struct intel_context *ce)
1946{
1947	/*
1948	 * We can use either ppHWSP[16] which is recorded before the context
1949	 * switch (and so excludes the cost of context switches) or use the
1950	 * value from the context image itself, which is saved/restored earlier
1951	 * and so includes the cost of the save.
1952	 */
1953	return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
1954}
1955
1956void lrc_update_runtime(struct intel_context *ce)
1957{
1958	struct intel_context_stats *stats = &ce->stats;
1959	u32 old;
1960	s32 dt;
1961
1962	old = stats->runtime.last;
1963	stats->runtime.last = lrc_get_runtime(ce);
1964	dt = stats->runtime.last - old;
1965	if (!dt)
1966		return;
1967
1968	if (unlikely(dt < 0)) {
1969		CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n",
1970			 old, stats->runtime.last, dt);
1971		st_runtime_underflow(stats, dt);
1972		return;
1973	}
1974
1975	ewma_runtime_add(&stats->runtime.avg, dt);
1976	stats->runtime.total += dt;
1977}
1978
1979#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
1980#include "selftest_lrc.c"
1981#endif