Linux Audio

Check our new training course

Loading...
v3.1
   1/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
   2 *
   3 * Copyright (C) The Weather Channel, Inc.  2002.
   4 * Copyright (C) 2004 Nicolai Haehnle.
   5 * All Rights Reserved.
   6 *
   7 * The Weather Channel (TM) funded Tungsten Graphics to develop the
   8 * initial release of the Radeon 8500 driver under the XFree86 license.
   9 * This notice must be preserved.
  10 *
  11 * Permission is hereby granted, free of charge, to any person obtaining a
  12 * copy of this software and associated documentation files (the "Software"),
  13 * to deal in the Software without restriction, including without limitation
  14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  15 * and/or sell copies of the Software, and to permit persons to whom the
  16 * Software is furnished to do so, subject to the following conditions:
  17 *
  18 * The above copyright notice and this permission notice (including the next
  19 * paragraph) shall be included in all copies or substantial portions of the
  20 * Software.
  21 *
  22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  25 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  26 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  27 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  28 * DEALINGS IN THE SOFTWARE.
  29 *
  30 * Authors:
  31 *    Nicolai Haehnle <prefect_@gmx.net>
 
 
  32 */
  33
  34#include "drmP.h"
  35#include "drm.h"
  36#include "drm_buffer.h"
  37#include "radeon_drm.h"
  38#include "radeon_drv.h"
  39#include "r300_reg.h"
  40
  41#include <asm/unaligned.h>
  42
  43#define R300_SIMULTANEOUS_CLIPRECTS		4
  44
  45/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
  46 */
  47static const int r300_cliprect_cntl[4] = {
  48	0xAAAA,
  49	0xEEEE,
  50	0xFEFE,
  51	0xFFFE
  52};
  53
  54/**
  55 * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
  56 * buffer, starting with index n.
  57 */
  58static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
  59			       drm_radeon_kcmd_buffer_t *cmdbuf, int n)
  60{
  61	struct drm_clip_rect box;
  62	int nr;
  63	int i;
  64	RING_LOCALS;
  65
  66	nr = cmdbuf->nbox - n;
  67	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
  68		nr = R300_SIMULTANEOUS_CLIPRECTS;
  69
  70	DRM_DEBUG("%i cliprects\n", nr);
  71
  72	if (nr) {
  73		BEGIN_RING(6 + nr * 2);
  74		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
  75
  76		for (i = 0; i < nr; ++i) {
  77			if (DRM_COPY_FROM_USER_UNCHECKED
  78			    (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
  79				DRM_ERROR("copy cliprect faulted\n");
  80				return -EFAULT;
  81			}
  82
  83			box.x2--; /* Hardware expects inclusive bottom-right corner */
  84			box.y2--;
  85
  86			if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
  87				box.x1 = (box.x1) &
  88					R300_CLIPRECT_MASK;
  89				box.y1 = (box.y1) &
  90					R300_CLIPRECT_MASK;
  91				box.x2 = (box.x2) &
  92					R300_CLIPRECT_MASK;
  93				box.y2 = (box.y2) &
  94					R300_CLIPRECT_MASK;
  95			} else {
  96				box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
  97					R300_CLIPRECT_MASK;
  98				box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
  99					R300_CLIPRECT_MASK;
 100				box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
 101					R300_CLIPRECT_MASK;
 102				box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
 103					R300_CLIPRECT_MASK;
 104			}
 105
 106			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
 107				 (box.y1 << R300_CLIPRECT_Y_SHIFT));
 108			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
 109				 (box.y2 << R300_CLIPRECT_Y_SHIFT));
 110
 111		}
 112
 113		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
 114
 115		/* TODO/SECURITY: Force scissors to a safe value, otherwise the
 116		 * client might be able to trample over memory.
 117		 * The impact should be very limited, but I'd rather be safe than
 118		 * sorry.
 119		 */
 120		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
 121		OUT_RING(0);
 122		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
 123		ADVANCE_RING();
 124	} else {
 125		/* Why we allow zero cliprect rendering:
 126		 * There are some commands in a command buffer that must be submitted
 127		 * even when there are no cliprects, e.g. DMA buffer discard
 128		 * or state setting (though state setting could be avoided by
 129		 * simulating a loss of context).
 130		 *
 131		 * Now since the cmdbuf interface is so chaotic right now (and is
 132		 * bound to remain that way for a bit until things settle down),
 133		 * it is basically impossible to filter out the commands that are
 134		 * necessary and those that aren't.
 135		 *
 136		 * So I choose the safe way and don't do any filtering at all;
 137		 * instead, I simply set up the engine so that all rendering
 138		 * can't produce any fragments.
 139		 */
 140		BEGIN_RING(2);
 141		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
 142		ADVANCE_RING();
 143	}
 144
 145	/* flus cache and wait idle clean after cliprect change */
 146	BEGIN_RING(2);
 147	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 148	OUT_RING(R300_RB3D_DC_FLUSH);
 149	ADVANCE_RING();
 150	BEGIN_RING(2);
 151	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 152	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
 153	ADVANCE_RING();
 154	/* set flush flag */
 155	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
 156
 157	return 0;
 158}
 159
 160static u8 r300_reg_flags[0x10000 >> 2];
 161
 162void r300_init_reg_flags(struct drm_device *dev)
 163{
 164	int i;
 165	drm_radeon_private_t *dev_priv = dev->dev_private;
 166
 167	memset(r300_reg_flags, 0, 0x10000 >> 2);
 168#define ADD_RANGE_MARK(reg, count,mark) \
 169		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
 170			r300_reg_flags[i]|=(mark);
 171
 172#define MARK_SAFE		1
 173#define MARK_CHECK_OFFSET	2
 174
 175#define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
 176
 177	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
 178	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
 179	ADD_RANGE(R300_VAP_CNTL, 1);
 180	ADD_RANGE(R300_SE_VTE_CNTL, 2);
 181	ADD_RANGE(0x2134, 2);
 182	ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
 183	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
 184	ADD_RANGE(0x21DC, 1);
 185	ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
 186	ADD_RANGE(R300_VAP_CLIP_X_0, 4);
 187	ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
 188	ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
 189	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
 190	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
 191	ADD_RANGE(R300_GB_ENABLE, 1);
 192	ADD_RANGE(R300_GB_MSPOS0, 5);
 193	ADD_RANGE(R300_TX_INVALTAGS, 1);
 194	ADD_RANGE(R300_TX_ENABLE, 1);
 195	ADD_RANGE(0x4200, 4);
 196	ADD_RANGE(0x4214, 1);
 197	ADD_RANGE(R300_RE_POINTSIZE, 1);
 198	ADD_RANGE(0x4230, 3);
 199	ADD_RANGE(R300_RE_LINE_CNT, 1);
 200	ADD_RANGE(R300_RE_UNK4238, 1);
 201	ADD_RANGE(0x4260, 3);
 202	ADD_RANGE(R300_RE_SHADE, 4);
 203	ADD_RANGE(R300_RE_POLYGON_MODE, 5);
 204	ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
 205	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
 206	ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
 207	ADD_RANGE(R300_RE_CULL_CNTL, 1);
 208	ADD_RANGE(0x42C0, 2);
 209	ADD_RANGE(R300_RS_CNTL_0, 2);
 210
 211	ADD_RANGE(R300_SU_REG_DEST, 1);
 212	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
 213		ADD_RANGE(RV530_FG_ZBREG_DEST, 1);
 214
 215	ADD_RANGE(R300_SC_HYPERZ, 2);
 216	ADD_RANGE(0x43E8, 1);
 217
 218	ADD_RANGE(0x46A4, 5);
 219
 220	ADD_RANGE(R300_RE_FOG_STATE, 1);
 221	ADD_RANGE(R300_FOG_COLOR_R, 3);
 222	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
 223	ADD_RANGE(0x4BD8, 1);
 224	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
 225	ADD_RANGE(0x4E00, 1);
 226	ADD_RANGE(R300_RB3D_CBLEND, 2);
 227	ADD_RANGE(R300_RB3D_COLORMASK, 1);
 228	ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
 229	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);	/* check offset */
 230	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
 231	ADD_RANGE(0x4E50, 9);
 232	ADD_RANGE(0x4E88, 1);
 233	ADD_RANGE(0x4EA0, 2);
 234	ADD_RANGE(R300_ZB_CNTL, 3);
 235	ADD_RANGE(R300_ZB_FORMAT, 4);
 236	ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);	/* check offset */
 237	ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
 238	ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
 239	ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
 240	ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */
 241
 242	ADD_RANGE(R300_TX_FILTER_0, 16);
 243	ADD_RANGE(R300_TX_FILTER1_0, 16);
 244	ADD_RANGE(R300_TX_SIZE_0, 16);
 245	ADD_RANGE(R300_TX_FORMAT_0, 16);
 246	ADD_RANGE(R300_TX_PITCH_0, 16);
 247	/* Texture offset is dangerous and needs more checking */
 248	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
 249	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
 250	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
 251
 252	/* Sporadic registers used as primitives are emitted */
 253	ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
 254	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
 255	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
 256	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
 257
 258	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
 259		ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
 260		ADD_RANGE(R500_US_CONFIG, 2);
 261		ADD_RANGE(R500_US_CODE_ADDR, 3);
 262		ADD_RANGE(R500_US_FC_CTRL, 1);
 263		ADD_RANGE(R500_RS_IP_0, 16);
 264		ADD_RANGE(R500_RS_INST_0, 16);
 265		ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
 266		ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
 267		ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
 268	} else {
 269		ADD_RANGE(R300_PFS_CNTL_0, 3);
 270		ADD_RANGE(R300_PFS_NODE_0, 4);
 271		ADD_RANGE(R300_PFS_TEXI_0, 64);
 272		ADD_RANGE(R300_PFS_INSTR0_0, 64);
 273		ADD_RANGE(R300_PFS_INSTR1_0, 64);
 274		ADD_RANGE(R300_PFS_INSTR2_0, 64);
 275		ADD_RANGE(R300_PFS_INSTR3_0, 64);
 276		ADD_RANGE(R300_RS_INTERP_0, 8);
 277		ADD_RANGE(R300_RS_ROUTE_0, 8);
 278
 279	}
 280}
 281
 282static __inline__ int r300_check_range(unsigned reg, int count)
 283{
 284	int i;
 285	if (reg & ~0xffff)
 286		return -1;
 287	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
 288		if (r300_reg_flags[i] != MARK_SAFE)
 289			return 1;
 290	return 0;
 291}
 292
 293static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
 294							  dev_priv,
 295							  drm_radeon_kcmd_buffer_t
 296							  * cmdbuf,
 297							  drm_r300_cmd_header_t
 298							  header)
 299{
 300	int reg;
 301	int sz;
 302	int i;
 303	u32 *value;
 304	RING_LOCALS;
 305
 306	sz = header.packet0.count;
 307	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
 308
 309	if ((sz > 64) || (sz < 0)) {
 310		DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
 311			 reg, sz);
 312		return -EINVAL;
 313	}
 314
 315	for (i = 0; i < sz; i++) {
 316		switch (r300_reg_flags[(reg >> 2) + i]) {
 317		case MARK_SAFE:
 318			break;
 319		case MARK_CHECK_OFFSET:
 320			value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
 321			if (!radeon_check_offset(dev_priv, *value)) {
 322				DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n",
 323					 reg, sz);
 324				return -EINVAL;
 325			}
 326			break;
 327		default:
 328			DRM_ERROR("Register %04x failed check as flag=%02x\n",
 329				reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
 330			return -EINVAL;
 331		}
 332	}
 333
 334	BEGIN_RING(1 + sz);
 335	OUT_RING(CP_PACKET0(reg, sz - 1));
 336	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 337	ADVANCE_RING();
 338
 339	return 0;
 340}
 341
 342/**
 343 * Emits a packet0 setting arbitrary registers.
 344 * Called by r300_do_cp_cmdbuf.
 345 *
 346 * Note that checks are performed on contents and addresses of the registers
 347 */
 348static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
 349					drm_radeon_kcmd_buffer_t *cmdbuf,
 350					drm_r300_cmd_header_t header)
 351{
 352	int reg;
 353	int sz;
 354	RING_LOCALS;
 355
 356	sz = header.packet0.count;
 357	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
 358
 359	if (!sz)
 360		return 0;
 361
 362	if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 363		return -EINVAL;
 364
 365	if (reg + sz * 4 >= 0x10000) {
 366		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
 367			  sz);
 368		return -EINVAL;
 369	}
 370
 371	if (r300_check_range(reg, sz)) {
 372		/* go and check everything */
 373		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
 374							   header);
 375	}
 376	/* the rest of the data is safe to emit, whatever the values the user passed */
 377
 378	BEGIN_RING(1 + sz);
 379	OUT_RING(CP_PACKET0(reg, sz - 1));
 380	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 381	ADVANCE_RING();
 382
 383	return 0;
 384}
 385
 386/**
 387 * Uploads user-supplied vertex program instructions or parameters onto
 388 * the graphics card.
 389 * Called by r300_do_cp_cmdbuf.
 390 */
 391static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
 392				    drm_radeon_kcmd_buffer_t *cmdbuf,
 393				    drm_r300_cmd_header_t header)
 394{
 395	int sz;
 396	int addr;
 397	RING_LOCALS;
 398
 399	sz = header.vpu.count;
 400	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
 401
 402	if (!sz)
 403		return 0;
 404	if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer))
 405		return -EINVAL;
 406
 407	/* VAP is very sensitive so we purge cache before we program it
 408	 * and we also flush its state before & after */
 409	BEGIN_RING(6);
 410	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 411	OUT_RING(R300_RB3D_DC_FLUSH);
 412	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 413	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
 414	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
 415	OUT_RING(0);
 416	ADVANCE_RING();
 417	/* set flush flag */
 418	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
 419
 420	BEGIN_RING(3 + sz * 4);
 421	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
 422	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
 423	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4);
 424	ADVANCE_RING();
 425
 426	BEGIN_RING(2);
 427	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
 428	OUT_RING(0);
 429	ADVANCE_RING();
 430
 431	return 0;
 432}
 433
 434/**
 435 * Emit a clear packet from userspace.
 436 * Called by r300_emit_packet3.
 437 */
 438static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
 439				      drm_radeon_kcmd_buffer_t *cmdbuf)
 440{
 441	RING_LOCALS;
 442
 443	if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 444		return -EINVAL;
 445
 446	BEGIN_RING(10);
 447	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
 448	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
 449		 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
 450	OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8);
 451	ADVANCE_RING();
 452
 453	BEGIN_RING(4);
 454	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 455	OUT_RING(R300_RB3D_DC_FLUSH);
 456	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 457	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
 458	ADVANCE_RING();
 459	/* set flush flag */
 460	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
 461
 462	return 0;
 463}
 464
 465static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
 466					       drm_radeon_kcmd_buffer_t *cmdbuf,
 467					       u32 header)
 468{
 469	int count, i, k;
 470#define MAX_ARRAY_PACKET  64
 471	u32 *data;
 472	u32 narrays;
 473	RING_LOCALS;
 474
 475	count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 476
 477	if ((count + 1) > MAX_ARRAY_PACKET) {
 478		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
 479			  count);
 480		return -EINVAL;
 481	}
 482	/* carefully check packet contents */
 483
 484	/* We have already read the header so advance the buffer. */
 485	drm_buffer_advance(cmdbuf->buffer, 4);
 486
 487	narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 488	k = 0;
 489	i = 1;
 490	while ((k < narrays) && (i < (count + 1))) {
 491		i++;		/* skip attribute field */
 492		data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
 493		if (!radeon_check_offset(dev_priv, *data)) {
 494			DRM_ERROR
 495			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
 496			     k, i);
 497			return -EINVAL;
 498		}
 499		k++;
 500		i++;
 501		if (k == narrays)
 502			break;
 503		/* have one more to process, they come in pairs */
 504		data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
 505		if (!radeon_check_offset(dev_priv, *data)) {
 506			DRM_ERROR
 507			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
 508			     k, i);
 509			return -EINVAL;
 510		}
 511		k++;
 512		i++;
 513	}
 514	/* do the counts match what we expect ? */
 515	if ((k != narrays) || (i != (count + 1))) {
 516		DRM_ERROR
 517		    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
 518		     k, i, narrays, count + 1);
 519		return -EINVAL;
 520	}
 521
 522	/* all clear, output packet */
 523
 524	BEGIN_RING(count + 2);
 525	OUT_RING(header);
 526	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1);
 527	ADVANCE_RING();
 528
 529	return 0;
 530}
 531
 532static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
 533					     drm_radeon_kcmd_buffer_t *cmdbuf)
 534{
 535	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 536	int count, ret;
 537	RING_LOCALS;
 538
 539
 540	count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 541
 542	if (*cmd & 0x8000) {
 543		u32 offset;
 544		u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
 545		if (*cmd1 & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
 546			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 547
 548			u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
 549			offset = *cmd2 << 10;
 550			ret = !radeon_check_offset(dev_priv, offset);
 551			if (ret) {
 552				DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
 553				return -EINVAL;
 554			}
 555		}
 556
 557		if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
 558		    (*cmd1 & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 559			u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
 560			offset = *cmd3 << 10;
 561			ret = !radeon_check_offset(dev_priv, offset);
 562			if (ret) {
 563				DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
 564				return -EINVAL;
 565			}
 566
 567		}
 568	}
 569
 570	BEGIN_RING(count+2);
 571	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 572	ADVANCE_RING();
 573
 574	return 0;
 575}
 576
 577static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
 578					    drm_radeon_kcmd_buffer_t *cmdbuf)
 579{
 580	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 581	u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
 582	int count;
 583	int expected_count;
 584	RING_LOCALS;
 585
 586	count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 587
 588	expected_count = *cmd1 >> 16;
 589	if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
 590		expected_count = (expected_count+1)/2;
 591
 592	if (count && count != expected_count) {
 593		DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
 594			count, expected_count);
 595		return -EINVAL;
 596	}
 597
 598	BEGIN_RING(count+2);
 599	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 600	ADVANCE_RING();
 601
 602	if (!count) {
 603		drm_r300_cmd_header_t stack_header, *header;
 604		u32 *cmd1, *cmd2, *cmd3;
 605
 606		if (drm_buffer_unprocessed(cmdbuf->buffer)
 607				< 4*4 + sizeof(stack_header)) {
 608			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
 609			return -EINVAL;
 610		}
 611
 612		header = drm_buffer_read_object(cmdbuf->buffer,
 613				sizeof(stack_header), &stack_header);
 614
 615		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 616		cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
 617		cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
 618		cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
 619
 620		if (header->header.cmd_type != R300_CMD_PACKET3 ||
 621		    header->packet3.packet != R300_CMD_PACKET3_RAW ||
 622		    *cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
 623			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
 624			return -EINVAL;
 625		}
 626
 627		if ((*cmd1 & 0x8000ffff) != 0x80000810) {
 628			DRM_ERROR("Invalid indx_buffer reg address %08X\n",
 629					*cmd1);
 630			return -EINVAL;
 631		}
 632		if (!radeon_check_offset(dev_priv, *cmd2)) {
 633			DRM_ERROR("Invalid indx_buffer offset is %08X\n",
 634					*cmd2);
 635			return -EINVAL;
 636		}
 637		if (*cmd3 != expected_count) {
 638			DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
 639				*cmd3, expected_count);
 640			return -EINVAL;
 641		}
 642
 643		BEGIN_RING(4);
 644		OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4);
 645		ADVANCE_RING();
 646	}
 647
 648	return 0;
 649}
 650
 651static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
 652					    drm_radeon_kcmd_buffer_t *cmdbuf)
 653{
 654	u32 *header;
 655	int count;
 656	RING_LOCALS;
 657
 658	if (4 > drm_buffer_unprocessed(cmdbuf->buffer))
 659		return -EINVAL;
 660
 661	/* Fixme !! This simply emits a packet without much checking.
 662	   We need to be smarter. */
 663
 664	/* obtain first word - actual packet3 header */
 665	header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 666
 667	/* Is it packet 3 ? */
 668	if ((*header >> 30) != 0x3) {
 669		DRM_ERROR("Not a packet3 header (0x%08x)\n", *header);
 670		return -EINVAL;
 671	}
 672
 673	count = (*header >> 16) & 0x3fff;
 674
 675	/* Check again now that we know how much data to expect */
 676	if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) {
 677		DRM_ERROR
 678		    ("Expected packet3 of length %d but have only %d bytes left\n",
 679		     (count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer));
 680		return -EINVAL;
 681	}
 682
 683	/* Is it a packet type we know about ? */
 684	switch (*header & 0xff00) {
 685	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
 686		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header);
 687
 688	case RADEON_CNTL_BITBLT_MULTI:
 689		return r300_emit_bitblt_multi(dev_priv, cmdbuf);
 690
 691	case RADEON_CP_INDX_BUFFER:
 692		DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
 693		return -EINVAL;
 694	case RADEON_CP_3D_DRAW_IMMD_2:
 695		/* triggers drawing using in-packet vertex data */
 696	case RADEON_CP_3D_DRAW_VBUF_2:
 697		/* triggers drawing of vertex buffers setup elsewhere */
 698		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
 699					   RADEON_PURGE_EMITED);
 700		break;
 701	case RADEON_CP_3D_DRAW_INDX_2:
 702		/* triggers drawing using indices to vertex buffer */
 703		/* whenever we send vertex we clear flush & purge */
 704		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
 705					   RADEON_PURGE_EMITED);
 706		return r300_emit_draw_indx_2(dev_priv, cmdbuf);
 707	case RADEON_WAIT_FOR_IDLE:
 708	case RADEON_CP_NOP:
 709		/* these packets are safe */
 710		break;
 711	default:
 712		DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header);
 713		return -EINVAL;
 714	}
 715
 716	BEGIN_RING(count + 2);
 717	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 718	ADVANCE_RING();
 719
 720	return 0;
 721}
 722
 723/**
 724 * Emit a rendering packet3 from userspace.
 725 * Called by r300_do_cp_cmdbuf.
 726 */
 727static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
 728					drm_radeon_kcmd_buffer_t *cmdbuf,
 729					drm_r300_cmd_header_t header)
 730{
 731	int n;
 732	int ret;
 733	int orig_iter = cmdbuf->buffer->iterator;
 734
 735	/* This is a do-while-loop so that we run the interior at least once,
 736	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
 737	 */
 738	n = 0;
 739	do {
 740		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
 741			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
 742			if (ret)
 743				return ret;
 744
 745			cmdbuf->buffer->iterator = orig_iter;
 746		}
 747
 748		switch (header.packet3.packet) {
 749		case R300_CMD_PACKET3_CLEAR:
 750			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
 751			ret = r300_emit_clear(dev_priv, cmdbuf);
 752			if (ret) {
 753				DRM_ERROR("r300_emit_clear failed\n");
 754				return ret;
 755			}
 756			break;
 757
 758		case R300_CMD_PACKET3_RAW:
 759			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
 760			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
 761			if (ret) {
 762				DRM_ERROR("r300_emit_raw_packet3 failed\n");
 763				return ret;
 764			}
 765			break;
 766
 767		default:
 768			DRM_ERROR("bad packet3 type %i at byte %d\n",
 769				  header.packet3.packet,
 770				  cmdbuf->buffer->iterator - (int)sizeof(header));
 771			return -EINVAL;
 772		}
 773
 774		n += R300_SIMULTANEOUS_CLIPRECTS;
 775	} while (n < cmdbuf->nbox);
 776
 777	return 0;
 778}
 779
 780/* Some of the R300 chips seem to be extremely touchy about the two registers
 781 * that are configured in r300_pacify.
 782 * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
 783 * sends a command buffer that contains only state setting commands and a
 784 * vertex program/parameter upload sequence, this will eventually lead to a
 785 * lockup, unless the sequence is bracketed by calls to r300_pacify.
 786 * So we should take great care to *always* call r300_pacify before
 787 * *anything* 3D related, and again afterwards. This is what the
 788 * call bracket in r300_do_cp_cmdbuf is for.
 789 */
 790
 791/**
 792 * Emit the sequence to pacify R300.
 793 */
 794static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
 795{
 796	uint32_t cache_z, cache_3d, cache_2d;
 797	RING_LOCALS;
 798
 799	cache_z = R300_ZC_FLUSH;
 800	cache_2d = R300_RB2D_DC_FLUSH;
 801	cache_3d = R300_RB3D_DC_FLUSH;
 802	if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
 803		/* we can purge, primitive where draw since last purge */
 804		cache_z |= R300_ZC_FREE;
 805		cache_2d |= R300_RB2D_DC_FREE;
 806		cache_3d |= R300_RB3D_DC_FREE;
 807	}
 808
 809	/* flush & purge zbuffer */
 810	BEGIN_RING(2);
 811	OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
 812	OUT_RING(cache_z);
 813	ADVANCE_RING();
 814	/* flush & purge 3d */
 815	BEGIN_RING(2);
 816	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 817	OUT_RING(cache_3d);
 818	ADVANCE_RING();
 819	/* flush & purge texture */
 820	BEGIN_RING(2);
 821	OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
 822	OUT_RING(0);
 823	ADVANCE_RING();
 824	/* FIXME: is this one really needed ? */
 825	BEGIN_RING(2);
 826	OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
 827	OUT_RING(0);
 828	ADVANCE_RING();
 829	BEGIN_RING(2);
 830	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 831	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
 832	ADVANCE_RING();
 833	/* flush & purge 2d through E2 as RB2D will trigger lockup */
 834	BEGIN_RING(4);
 835	OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
 836	OUT_RING(cache_2d);
 837	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 838	OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
 839		 RADEON_WAIT_HOST_IDLECLEAN);
 840	ADVANCE_RING();
 841	/* set flush & purge flags */
 842	dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
 843}
 844
 845/**
 846 * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
 847 * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
 848 * be careful about how this function is called.
 849 */
 850static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 851{
 852	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
 853	struct drm_radeon_master_private *master_priv = master->driver_priv;
 854
 855	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
 856	buf->pending = 1;
 857	buf->used = 0;
 858}
 859
 860static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
 861			  drm_r300_cmd_header_t header)
 862{
 863	u32 wait_until;
 864	RING_LOCALS;
 865
 866	if (!header.wait.flags)
 867		return;
 868
 869	wait_until = 0;
 870
 871	switch(header.wait.flags) {
 872	case R300_WAIT_2D:
 873		wait_until = RADEON_WAIT_2D_IDLE;
 874		break;
 875	case R300_WAIT_3D:
 876		wait_until = RADEON_WAIT_3D_IDLE;
 877		break;
 878	case R300_NEW_WAIT_2D_3D:
 879		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
 880		break;
 881	case R300_NEW_WAIT_2D_2D_CLEAN:
 882		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
 883		break;
 884	case R300_NEW_WAIT_3D_3D_CLEAN:
 885		wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
 886		break;
 887	case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
 888		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
 889		wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
 890		break;
 891	default:
 892		return;
 893	}
 894
 895	BEGIN_RING(2);
 896	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 897	OUT_RING(wait_until);
 898	ADVANCE_RING();
 899}
 900
 901static int r300_scratch(drm_radeon_private_t *dev_priv,
 902			drm_radeon_kcmd_buffer_t *cmdbuf,
 903			drm_r300_cmd_header_t header)
 904{
 905	u32 *ref_age_base;
 906	u32 i, *buf_idx, h_pending;
 907	u64 *ptr_addr;
 908	u64 stack_ptr_addr;
 909	RING_LOCALS;
 910
 911	if (drm_buffer_unprocessed(cmdbuf->buffer) <
 912	    (sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) {
 913		return -EINVAL;
 914	}
 915
 916	if (header.scratch.reg >= 5) {
 917		return -EINVAL;
 918	}
 919
 920	dev_priv->scratch_ages[header.scratch.reg]++;
 921
 922	ptr_addr = drm_buffer_read_object(cmdbuf->buffer,
 923			sizeof(stack_ptr_addr), &stack_ptr_addr);
 924	ref_age_base = (u32 *)(unsigned long)get_unaligned(ptr_addr);
 925
 926	for (i=0; i < header.scratch.n_bufs; i++) {
 927		buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 928		*buf_idx *= 2; /* 8 bytes per buf */
 929
 930		if (DRM_COPY_TO_USER(ref_age_base + *buf_idx,
 931				&dev_priv->scratch_ages[header.scratch.reg],
 932				sizeof(u32)))
 933			return -EINVAL;
 934
 935		if (DRM_COPY_FROM_USER(&h_pending,
 936				ref_age_base + *buf_idx + 1,
 937				sizeof(u32)))
 938			return -EINVAL;
 939
 940		if (h_pending == 0)
 941			return -EINVAL;
 942
 943		h_pending--;
 944
 945		if (DRM_COPY_TO_USER(ref_age_base + *buf_idx + 1,
 946					&h_pending,
 947					sizeof(u32)))
 948			return -EINVAL;
 949
 950		drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx));
 951	}
 952
 953	BEGIN_RING(2);
 954	OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
 955	OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
 956	ADVANCE_RING();
 957
 958	return 0;
 959}
 960
 961/**
 962 * Uploads user-supplied vertex program instructions or parameters onto
 963 * the graphics card.
 964 * Called by r300_do_cp_cmdbuf.
 965 */
 966static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
 967				       drm_radeon_kcmd_buffer_t *cmdbuf,
 968				       drm_r300_cmd_header_t header)
 969{
 970	int sz;
 971	int addr;
 972	int type;
 973	int isclamp;
 974	int stride;
 975	RING_LOCALS;
 976
 977	sz = header.r500fp.count;
 978	/* address is 9 bits 0 - 8, bit 1 of flags is part of address */
 979	addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
 980
 981	type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
 982	isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
 983
 984	addr |= (type << 16);
 985	addr |= (isclamp << 17);
 986
 987	stride = type ? 4 : 6;
 988
 989	DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
 990	if (!sz)
 991		return 0;
 992	if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 993		return -EINVAL;
 994
 995	BEGIN_RING(3 + sz * stride);
 996	OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
 997	OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
 998	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride);
 999
1000	ADVANCE_RING();
1001
1002	return 0;
1003}
1004
1005
1006/**
1007 * Parses and validates a user-supplied command buffer and emits appropriate
1008 * commands on the DMA ring buffer.
1009 * Called by the ioctl handler function radeon_cp_cmdbuf.
1010 */
1011int r300_do_cp_cmdbuf(struct drm_device *dev,
1012		      struct drm_file *file_priv,
1013		      drm_radeon_kcmd_buffer_t *cmdbuf)
1014{
1015	drm_radeon_private_t *dev_priv = dev->dev_private;
1016	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1017	struct drm_device_dma *dma = dev->dma;
1018	struct drm_buf *buf = NULL;
1019	int emit_dispatch_age = 0;
1020	int ret = 0;
1021
1022	DRM_DEBUG("\n");
1023
1024	/* pacify */
1025	r300_pacify(dev_priv);
1026
1027	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1028		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1029		if (ret)
1030			goto cleanup;
1031	}
1032
1033	while (drm_buffer_unprocessed(cmdbuf->buffer)
1034			>= sizeof(drm_r300_cmd_header_t)) {
1035		int idx;
1036		drm_r300_cmd_header_t *header, stack_header;
1037
1038		header = drm_buffer_read_object(cmdbuf->buffer,
1039				sizeof(stack_header), &stack_header);
1040
1041		switch (header->header.cmd_type) {
1042		case R300_CMD_PACKET0:
1043			DRM_DEBUG("R300_CMD_PACKET0\n");
1044			ret = r300_emit_packet0(dev_priv, cmdbuf, *header);
1045			if (ret) {
1046				DRM_ERROR("r300_emit_packet0 failed\n");
1047				goto cleanup;
1048			}
1049			break;
1050
1051		case R300_CMD_VPU:
1052			DRM_DEBUG("R300_CMD_VPU\n");
1053			ret = r300_emit_vpu(dev_priv, cmdbuf, *header);
1054			if (ret) {
1055				DRM_ERROR("r300_emit_vpu failed\n");
1056				goto cleanup;
1057			}
1058			break;
1059
1060		case R300_CMD_PACKET3:
1061			DRM_DEBUG("R300_CMD_PACKET3\n");
1062			ret = r300_emit_packet3(dev_priv, cmdbuf, *header);
1063			if (ret) {
1064				DRM_ERROR("r300_emit_packet3 failed\n");
1065				goto cleanup;
1066			}
1067			break;
1068
1069		case R300_CMD_END3D:
1070			DRM_DEBUG("R300_CMD_END3D\n");
1071			/* TODO:
1072			   Ideally userspace driver should not need to issue this call,
1073			   i.e. the drm driver should issue it automatically and prevent
1074			   lockups.
1075
1076			   In practice, we do not understand why this call is needed and what
1077			   it does (except for some vague guesses that it has to do with cache
1078			   coherence) and so the user space driver does it.
1079
1080			   Once we are sure which uses prevent lockups the code could be moved
1081			   into the kernel and the userspace driver will not
1082			   need to use this command.
1083
1084			   Note that issuing this command does not hurt anything
1085			   except, possibly, performance */
1086			r300_pacify(dev_priv);
1087			break;
1088
1089		case R300_CMD_CP_DELAY:
1090			/* simple enough, we can do it here */
1091			DRM_DEBUG("R300_CMD_CP_DELAY\n");
1092			{
1093				int i;
1094				RING_LOCALS;
1095
1096				BEGIN_RING(header->delay.count);
1097				for (i = 0; i < header->delay.count; i++)
1098					OUT_RING(RADEON_CP_PACKET2);
1099				ADVANCE_RING();
1100			}
1101			break;
1102
1103		case R300_CMD_DMA_DISCARD:
1104			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1105			idx = header->dma.buf_idx;
1106			if (idx < 0 || idx >= dma->buf_count) {
1107				DRM_ERROR("buffer index %d (of %d max)\n",
1108					  idx, dma->buf_count - 1);
1109				ret = -EINVAL;
1110				goto cleanup;
1111			}
1112
1113			buf = dma->buflist[idx];
1114			if (buf->file_priv != file_priv || buf->pending) {
1115				DRM_ERROR("bad buffer %p %p %d\n",
1116					  buf->file_priv, file_priv,
1117					  buf->pending);
1118				ret = -EINVAL;
1119				goto cleanup;
1120			}
1121
1122			emit_dispatch_age = 1;
1123			r300_discard_buffer(dev, file_priv->master, buf);
1124			break;
1125
1126		case R300_CMD_WAIT:
1127			DRM_DEBUG("R300_CMD_WAIT\n");
1128			r300_cmd_wait(dev_priv, *header);
1129			break;
1130
1131		case R300_CMD_SCRATCH:
1132			DRM_DEBUG("R300_CMD_SCRATCH\n");
1133			ret = r300_scratch(dev_priv, cmdbuf, *header);
1134			if (ret) {
1135				DRM_ERROR("r300_scratch failed\n");
1136				goto cleanup;
1137			}
1138			break;
1139
1140		case R300_CMD_R500FP:
1141			if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1142				DRM_ERROR("Calling r500 command on r300 card\n");
1143				ret = -EINVAL;
1144				goto cleanup;
1145			}
1146			DRM_DEBUG("R300_CMD_R500FP\n");
1147			ret = r300_emit_r500fp(dev_priv, cmdbuf, *header);
1148			if (ret) {
1149				DRM_ERROR("r300_emit_r500fp failed\n");
1150				goto cleanup;
1151			}
1152			break;
1153		default:
1154			DRM_ERROR("bad cmd_type %i at byte %d\n",
1155				  header->header.cmd_type,
1156				  cmdbuf->buffer->iterator - (int)sizeof(*header));
1157			ret = -EINVAL;
1158			goto cleanup;
1159		}
1160	}
1161
1162	DRM_DEBUG("END\n");
1163
1164      cleanup:
1165	r300_pacify(dev_priv);
1166
1167	/* We emit the vertex buffer age here, outside the pacifier "brackets"
1168	 * for two reasons:
1169	 *  (1) This may coalesce multiple age emissions into a single one and
1170	 *  (2) more importantly, some chips lock up hard when scratch registers
1171	 *      are written inside the pacifier bracket.
1172	 */
1173	if (emit_dispatch_age) {
1174		RING_LOCALS;
1175
1176		/* Emit the vertex buffer age */
1177		BEGIN_RING(2);
1178		RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);
1179		ADVANCE_RING();
1180	}
1181
1182	COMMIT_RING();
1183
1184	return ret;
1185}
v3.15
   1/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
   2 *
   3 * Copyright (C) The Weather Channel, Inc.  2002.
   4 * Copyright (C) 2004 Nicolai Haehnle.
   5 * All Rights Reserved.
   6 *
   7 * The Weather Channel (TM) funded Tungsten Graphics to develop the
   8 * initial release of the Radeon 8500 driver under the XFree86 license.
   9 * This notice must be preserved.
  10 *
  11 * Permission is hereby granted, free of charge, to any person obtaining a
  12 * copy of this software and associated documentation files (the "Software"),
  13 * to deal in the Software without restriction, including without limitation
  14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  15 * and/or sell copies of the Software, and to permit persons to whom the
  16 * Software is furnished to do so, subject to the following conditions:
  17 *
  18 * The above copyright notice and this permission notice (including the next
  19 * paragraph) shall be included in all copies or substantial portions of the
  20 * Software.
  21 *
  22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  25 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  26 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  27 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  28 * DEALINGS IN THE SOFTWARE.
  29 *
  30 * Authors:
  31 *    Nicolai Haehnle <prefect_@gmx.net>
  32 *
  33 * ------------------------ This file is DEPRECATED! -------------------------
  34 */
  35
  36#include <drm/drmP.h>
  37#include <drm/drm_buffer.h>
  38#include <drm/radeon_drm.h>
 
  39#include "radeon_drv.h"
  40#include "r300_reg.h"
  41
  42#include <asm/unaligned.h>
  43
  44#define R300_SIMULTANEOUS_CLIPRECTS		4
  45
  46/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
  47 */
  48static const int r300_cliprect_cntl[4] = {
  49	0xAAAA,
  50	0xEEEE,
  51	0xFEFE,
  52	0xFFFE
  53};
  54
  55/**
  56 * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
  57 * buffer, starting with index n.
  58 */
  59static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
  60			       drm_radeon_kcmd_buffer_t *cmdbuf, int n)
  61{
  62	struct drm_clip_rect box;
  63	int nr;
  64	int i;
  65	RING_LOCALS;
  66
  67	nr = cmdbuf->nbox - n;
  68	if (nr > R300_SIMULTANEOUS_CLIPRECTS)
  69		nr = R300_SIMULTANEOUS_CLIPRECTS;
  70
  71	DRM_DEBUG("%i cliprects\n", nr);
  72
  73	if (nr) {
  74		BEGIN_RING(6 + nr * 2);
  75		OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
  76
  77		for (i = 0; i < nr; ++i) {
  78			if (copy_from_user
  79			    (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
  80				DRM_ERROR("copy cliprect faulted\n");
  81				return -EFAULT;
  82			}
  83
  84			box.x2--; /* Hardware expects inclusive bottom-right corner */
  85			box.y2--;
  86
  87			if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
  88				box.x1 = (box.x1) &
  89					R300_CLIPRECT_MASK;
  90				box.y1 = (box.y1) &
  91					R300_CLIPRECT_MASK;
  92				box.x2 = (box.x2) &
  93					R300_CLIPRECT_MASK;
  94				box.y2 = (box.y2) &
  95					R300_CLIPRECT_MASK;
  96			} else {
  97				box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
  98					R300_CLIPRECT_MASK;
  99				box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
 100					R300_CLIPRECT_MASK;
 101				box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
 102					R300_CLIPRECT_MASK;
 103				box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
 104					R300_CLIPRECT_MASK;
 105			}
 106
 107			OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
 108				 (box.y1 << R300_CLIPRECT_Y_SHIFT));
 109			OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
 110				 (box.y2 << R300_CLIPRECT_Y_SHIFT));
 111
 112		}
 113
 114		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
 115
 116		/* TODO/SECURITY: Force scissors to a safe value, otherwise the
 117		 * client might be able to trample over memory.
 118		 * The impact should be very limited, but I'd rather be safe than
 119		 * sorry.
 120		 */
 121		OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
 122		OUT_RING(0);
 123		OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
 124		ADVANCE_RING();
 125	} else {
 126		/* Why we allow zero cliprect rendering:
 127		 * There are some commands in a command buffer that must be submitted
 128		 * even when there are no cliprects, e.g. DMA buffer discard
 129		 * or state setting (though state setting could be avoided by
 130		 * simulating a loss of context).
 131		 *
 132		 * Now since the cmdbuf interface is so chaotic right now (and is
 133		 * bound to remain that way for a bit until things settle down),
 134		 * it is basically impossible to filter out the commands that are
 135		 * necessary and those that aren't.
 136		 *
 137		 * So I choose the safe way and don't do any filtering at all;
 138		 * instead, I simply set up the engine so that all rendering
 139		 * can't produce any fragments.
 140		 */
 141		BEGIN_RING(2);
 142		OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
 143		ADVANCE_RING();
 144	}
 145
 146	/* flus cache and wait idle clean after cliprect change */
 147	BEGIN_RING(2);
 148	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 149	OUT_RING(R300_RB3D_DC_FLUSH);
 150	ADVANCE_RING();
 151	BEGIN_RING(2);
 152	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 153	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
 154	ADVANCE_RING();
 155	/* set flush flag */
 156	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
 157
 158	return 0;
 159}
 160
 161static u8 r300_reg_flags[0x10000 >> 2];
 162
 163void r300_init_reg_flags(struct drm_device *dev)
 164{
 165	int i;
 166	drm_radeon_private_t *dev_priv = dev->dev_private;
 167
 168	memset(r300_reg_flags, 0, 0x10000 >> 2);
 169#define ADD_RANGE_MARK(reg, count,mark) \
 170		for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
 171			r300_reg_flags[i]|=(mark);
 172
 173#define MARK_SAFE		1
 174#define MARK_CHECK_OFFSET	2
 175
 176#define ADD_RANGE(reg, count)	ADD_RANGE_MARK(reg, count, MARK_SAFE)
 177
 178	/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
 179	ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
 180	ADD_RANGE(R300_VAP_CNTL, 1);
 181	ADD_RANGE(R300_SE_VTE_CNTL, 2);
 182	ADD_RANGE(0x2134, 2);
 183	ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
 184	ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
 185	ADD_RANGE(0x21DC, 1);
 186	ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
 187	ADD_RANGE(R300_VAP_CLIP_X_0, 4);
 188	ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
 189	ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
 190	ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
 191	ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
 192	ADD_RANGE(R300_GB_ENABLE, 1);
 193	ADD_RANGE(R300_GB_MSPOS0, 5);
 194	ADD_RANGE(R300_TX_INVALTAGS, 1);
 195	ADD_RANGE(R300_TX_ENABLE, 1);
 196	ADD_RANGE(0x4200, 4);
 197	ADD_RANGE(0x4214, 1);
 198	ADD_RANGE(R300_RE_POINTSIZE, 1);
 199	ADD_RANGE(0x4230, 3);
 200	ADD_RANGE(R300_RE_LINE_CNT, 1);
 201	ADD_RANGE(R300_RE_UNK4238, 1);
 202	ADD_RANGE(0x4260, 3);
 203	ADD_RANGE(R300_RE_SHADE, 4);
 204	ADD_RANGE(R300_RE_POLYGON_MODE, 5);
 205	ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
 206	ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
 207	ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
 208	ADD_RANGE(R300_RE_CULL_CNTL, 1);
 209	ADD_RANGE(0x42C0, 2);
 210	ADD_RANGE(R300_RS_CNTL_0, 2);
 211
 212	ADD_RANGE(R300_SU_REG_DEST, 1);
 213	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530)
 214		ADD_RANGE(RV530_FG_ZBREG_DEST, 1);
 215
 216	ADD_RANGE(R300_SC_HYPERZ, 2);
 217	ADD_RANGE(0x43E8, 1);
 218
 219	ADD_RANGE(0x46A4, 5);
 220
 221	ADD_RANGE(R300_RE_FOG_STATE, 1);
 222	ADD_RANGE(R300_FOG_COLOR_R, 3);
 223	ADD_RANGE(R300_PP_ALPHA_TEST, 2);
 224	ADD_RANGE(0x4BD8, 1);
 225	ADD_RANGE(R300_PFS_PARAM_0_X, 64);
 226	ADD_RANGE(0x4E00, 1);
 227	ADD_RANGE(R300_RB3D_CBLEND, 2);
 228	ADD_RANGE(R300_RB3D_COLORMASK, 1);
 229	ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
 230	ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);	/* check offset */
 231	ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
 232	ADD_RANGE(0x4E50, 9);
 233	ADD_RANGE(0x4E88, 1);
 234	ADD_RANGE(0x4EA0, 2);
 235	ADD_RANGE(R300_ZB_CNTL, 3);
 236	ADD_RANGE(R300_ZB_FORMAT, 4);
 237	ADD_RANGE_MARK(R300_ZB_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);	/* check offset */
 238	ADD_RANGE(R300_ZB_DEPTHPITCH, 1);
 239	ADD_RANGE(R300_ZB_DEPTHCLEARVALUE, 1);
 240	ADD_RANGE(R300_ZB_ZMASK_OFFSET, 13);
 241	ADD_RANGE(R300_ZB_ZPASS_DATA, 2); /* ZB_ZPASS_DATA, ZB_ZPASS_ADDR */
 242
 243	ADD_RANGE(R300_TX_FILTER_0, 16);
 244	ADD_RANGE(R300_TX_FILTER1_0, 16);
 245	ADD_RANGE(R300_TX_SIZE_0, 16);
 246	ADD_RANGE(R300_TX_FORMAT_0, 16);
 247	ADD_RANGE(R300_TX_PITCH_0, 16);
 248	/* Texture offset is dangerous and needs more checking */
 249	ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
 250	ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
 251	ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
 252
 253	/* Sporadic registers used as primitives are emitted */
 254	ADD_RANGE(R300_ZB_ZCACHE_CTLSTAT, 1);
 255	ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
 256	ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
 257	ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
 258
 259	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
 260		ADD_RANGE(R500_VAP_INDEX_OFFSET, 1);
 261		ADD_RANGE(R500_US_CONFIG, 2);
 262		ADD_RANGE(R500_US_CODE_ADDR, 3);
 263		ADD_RANGE(R500_US_FC_CTRL, 1);
 264		ADD_RANGE(R500_RS_IP_0, 16);
 265		ADD_RANGE(R500_RS_INST_0, 16);
 266		ADD_RANGE(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2);
 267		ADD_RANGE(R500_RB3D_CONSTANT_COLOR_AR, 2);
 268		ADD_RANGE(R500_ZB_FIFO_SIZE, 2);
 269	} else {
 270		ADD_RANGE(R300_PFS_CNTL_0, 3);
 271		ADD_RANGE(R300_PFS_NODE_0, 4);
 272		ADD_RANGE(R300_PFS_TEXI_0, 64);
 273		ADD_RANGE(R300_PFS_INSTR0_0, 64);
 274		ADD_RANGE(R300_PFS_INSTR1_0, 64);
 275		ADD_RANGE(R300_PFS_INSTR2_0, 64);
 276		ADD_RANGE(R300_PFS_INSTR3_0, 64);
 277		ADD_RANGE(R300_RS_INTERP_0, 8);
 278		ADD_RANGE(R300_RS_ROUTE_0, 8);
 279
 280	}
 281}
 282
 283static __inline__ int r300_check_range(unsigned reg, int count)
 284{
 285	int i;
 286	if (reg & ~0xffff)
 287		return -1;
 288	for (i = (reg >> 2); i < (reg >> 2) + count; i++)
 289		if (r300_reg_flags[i] != MARK_SAFE)
 290			return 1;
 291	return 0;
 292}
 293
 294static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
 295							  dev_priv,
 296							  drm_radeon_kcmd_buffer_t
 297							  * cmdbuf,
 298							  drm_r300_cmd_header_t
 299							  header)
 300{
 301	int reg;
 302	int sz;
 303	int i;
 304	u32 *value;
 305	RING_LOCALS;
 306
 307	sz = header.packet0.count;
 308	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
 309
 310	if ((sz > 64) || (sz < 0)) {
 311		DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
 312			 reg, sz);
 313		return -EINVAL;
 314	}
 315
 316	for (i = 0; i < sz; i++) {
 317		switch (r300_reg_flags[(reg >> 2) + i]) {
 318		case MARK_SAFE:
 319			break;
 320		case MARK_CHECK_OFFSET:
 321			value = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
 322			if (!radeon_check_offset(dev_priv, *value)) {
 323				DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n",
 324					 reg, sz);
 325				return -EINVAL;
 326			}
 327			break;
 328		default:
 329			DRM_ERROR("Register %04x failed check as flag=%02x\n",
 330				reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
 331			return -EINVAL;
 332		}
 333	}
 334
 335	BEGIN_RING(1 + sz);
 336	OUT_RING(CP_PACKET0(reg, sz - 1));
 337	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 338	ADVANCE_RING();
 339
 340	return 0;
 341}
 342
 343/**
 344 * Emits a packet0 setting arbitrary registers.
 345 * Called by r300_do_cp_cmdbuf.
 346 *
 347 * Note that checks are performed on contents and addresses of the registers
 348 */
 349static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
 350					drm_radeon_kcmd_buffer_t *cmdbuf,
 351					drm_r300_cmd_header_t header)
 352{
 353	int reg;
 354	int sz;
 355	RING_LOCALS;
 356
 357	sz = header.packet0.count;
 358	reg = (header.packet0.reghi << 8) | header.packet0.reglo;
 359
 360	if (!sz)
 361		return 0;
 362
 363	if (sz * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 364		return -EINVAL;
 365
 366	if (reg + sz * 4 >= 0x10000) {
 367		DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
 368			  sz);
 369		return -EINVAL;
 370	}
 371
 372	if (r300_check_range(reg, sz)) {
 373		/* go and check everything */
 374		return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
 375							   header);
 376	}
 377	/* the rest of the data is safe to emit, whatever the values the user passed */
 378
 379	BEGIN_RING(1 + sz);
 380	OUT_RING(CP_PACKET0(reg, sz - 1));
 381	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz);
 382	ADVANCE_RING();
 383
 384	return 0;
 385}
 386
 387/**
 388 * Uploads user-supplied vertex program instructions or parameters onto
 389 * the graphics card.
 390 * Called by r300_do_cp_cmdbuf.
 391 */
 392static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
 393				    drm_radeon_kcmd_buffer_t *cmdbuf,
 394				    drm_r300_cmd_header_t header)
 395{
 396	int sz;
 397	int addr;
 398	RING_LOCALS;
 399
 400	sz = header.vpu.count;
 401	addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
 402
 403	if (!sz)
 404		return 0;
 405	if (sz * 16 > drm_buffer_unprocessed(cmdbuf->buffer))
 406		return -EINVAL;
 407
 408	/* VAP is very sensitive so we purge cache before we program it
 409	 * and we also flush its state before & after */
 410	BEGIN_RING(6);
 411	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 412	OUT_RING(R300_RB3D_DC_FLUSH);
 413	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 414	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
 415	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
 416	OUT_RING(0);
 417	ADVANCE_RING();
 418	/* set flush flag */
 419	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
 420
 421	BEGIN_RING(3 + sz * 4);
 422	OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
 423	OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
 424	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * 4);
 425	ADVANCE_RING();
 426
 427	BEGIN_RING(2);
 428	OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
 429	OUT_RING(0);
 430	ADVANCE_RING();
 431
 432	return 0;
 433}
 434
 435/**
 436 * Emit a clear packet from userspace.
 437 * Called by r300_emit_packet3.
 438 */
 439static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
 440				      drm_radeon_kcmd_buffer_t *cmdbuf)
 441{
 442	RING_LOCALS;
 443
 444	if (8 * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 445		return -EINVAL;
 446
 447	BEGIN_RING(10);
 448	OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
 449	OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
 450		 (1 << R300_PRIM_NUM_VERTICES_SHIFT));
 451	OUT_RING_DRM_BUFFER(cmdbuf->buffer, 8);
 452	ADVANCE_RING();
 453
 454	BEGIN_RING(4);
 455	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 456	OUT_RING(R300_RB3D_DC_FLUSH);
 457	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 458	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
 459	ADVANCE_RING();
 460	/* set flush flag */
 461	dev_priv->track_flush |= RADEON_FLUSH_EMITED;
 462
 463	return 0;
 464}
 465
 466static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
 467					       drm_radeon_kcmd_buffer_t *cmdbuf,
 468					       u32 header)
 469{
 470	int count, i, k;
 471#define MAX_ARRAY_PACKET  64
 472	u32 *data;
 473	u32 narrays;
 474	RING_LOCALS;
 475
 476	count = (header & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 477
 478	if ((count + 1) > MAX_ARRAY_PACKET) {
 479		DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
 480			  count);
 481		return -EINVAL;
 482	}
 483	/* carefully check packet contents */
 484
 485	/* We have already read the header so advance the buffer. */
 486	drm_buffer_advance(cmdbuf->buffer, 4);
 487
 488	narrays = *(u32 *)drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 489	k = 0;
 490	i = 1;
 491	while ((k < narrays) && (i < (count + 1))) {
 492		i++;		/* skip attribute field */
 493		data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
 494		if (!radeon_check_offset(dev_priv, *data)) {
 495			DRM_ERROR
 496			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
 497			     k, i);
 498			return -EINVAL;
 499		}
 500		k++;
 501		i++;
 502		if (k == narrays)
 503			break;
 504		/* have one more to process, they come in pairs */
 505		data = drm_buffer_pointer_to_dword(cmdbuf->buffer, i);
 506		if (!radeon_check_offset(dev_priv, *data)) {
 507			DRM_ERROR
 508			    ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
 509			     k, i);
 510			return -EINVAL;
 511		}
 512		k++;
 513		i++;
 514	}
 515	/* do the counts match what we expect ? */
 516	if ((k != narrays) || (i != (count + 1))) {
 517		DRM_ERROR
 518		    ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
 519		     k, i, narrays, count + 1);
 520		return -EINVAL;
 521	}
 522
 523	/* all clear, output packet */
 524
 525	BEGIN_RING(count + 2);
 526	OUT_RING(header);
 527	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 1);
 528	ADVANCE_RING();
 529
 530	return 0;
 531}
 532
 533static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
 534					     drm_radeon_kcmd_buffer_t *cmdbuf)
 535{
 536	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 537	int count, ret;
 538	RING_LOCALS;
 539
 540
 541	count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 542
 543	if (*cmd & 0x8000) {
 544		u32 offset;
 545		u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
 546		if (*cmd1 & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
 547			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 548
 549			u32 *cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
 550			offset = *cmd2 << 10;
 551			ret = !radeon_check_offset(dev_priv, offset);
 552			if (ret) {
 553				DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
 554				return -EINVAL;
 555			}
 556		}
 557
 558		if ((*cmd1 & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
 559		    (*cmd1 & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 560			u32 *cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
 561			offset = *cmd3 << 10;
 562			ret = !radeon_check_offset(dev_priv, offset);
 563			if (ret) {
 564				DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
 565				return -EINVAL;
 566			}
 567
 568		}
 569	}
 570
 571	BEGIN_RING(count+2);
 572	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 573	ADVANCE_RING();
 574
 575	return 0;
 576}
 577
 578static __inline__ int r300_emit_draw_indx_2(drm_radeon_private_t *dev_priv,
 579					    drm_radeon_kcmd_buffer_t *cmdbuf)
 580{
 581	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 582	u32 *cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
 583	int count;
 584	int expected_count;
 585	RING_LOCALS;
 586
 587	count = (*cmd & RADEON_CP_PACKET_COUNT_MASK) >> 16;
 588
 589	expected_count = *cmd1 >> 16;
 590	if (!(*cmd1 & R300_VAP_VF_CNTL__INDEX_SIZE_32bit))
 591		expected_count = (expected_count+1)/2;
 592
 593	if (count && count != expected_count) {
 594		DRM_ERROR("3D_DRAW_INDX_2: packet size %i, expected %i\n",
 595			count, expected_count);
 596		return -EINVAL;
 597	}
 598
 599	BEGIN_RING(count+2);
 600	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 601	ADVANCE_RING();
 602
 603	if (!count) {
 604		drm_r300_cmd_header_t stack_header, *header;
 605		u32 *cmd1, *cmd2, *cmd3;
 606
 607		if (drm_buffer_unprocessed(cmdbuf->buffer)
 608				< 4*4 + sizeof(stack_header)) {
 609			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER, but stream is too short.\n");
 610			return -EINVAL;
 611		}
 612
 613		header = drm_buffer_read_object(cmdbuf->buffer,
 614				sizeof(stack_header), &stack_header);
 615
 616		cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 617		cmd1 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 1);
 618		cmd2 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 2);
 619		cmd3 = drm_buffer_pointer_to_dword(cmdbuf->buffer, 3);
 620
 621		if (header->header.cmd_type != R300_CMD_PACKET3 ||
 622		    header->packet3.packet != R300_CMD_PACKET3_RAW ||
 623		    *cmd != CP_PACKET3(RADEON_CP_INDX_BUFFER, 2)) {
 624			DRM_ERROR("3D_DRAW_INDX_2: expect subsequent INDX_BUFFER.\n");
 625			return -EINVAL;
 626		}
 627
 628		if ((*cmd1 & 0x8000ffff) != 0x80000810) {
 629			DRM_ERROR("Invalid indx_buffer reg address %08X\n",
 630					*cmd1);
 631			return -EINVAL;
 632		}
 633		if (!radeon_check_offset(dev_priv, *cmd2)) {
 634			DRM_ERROR("Invalid indx_buffer offset is %08X\n",
 635					*cmd2);
 636			return -EINVAL;
 637		}
 638		if (*cmd3 != expected_count) {
 639			DRM_ERROR("INDX_BUFFER: buffer size %i, expected %i\n",
 640				*cmd3, expected_count);
 641			return -EINVAL;
 642		}
 643
 644		BEGIN_RING(4);
 645		OUT_RING_DRM_BUFFER(cmdbuf->buffer, 4);
 646		ADVANCE_RING();
 647	}
 648
 649	return 0;
 650}
 651
 652static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
 653					    drm_radeon_kcmd_buffer_t *cmdbuf)
 654{
 655	u32 *header;
 656	int count;
 657	RING_LOCALS;
 658
 659	if (4 > drm_buffer_unprocessed(cmdbuf->buffer))
 660		return -EINVAL;
 661
 662	/* Fixme !! This simply emits a packet without much checking.
 663	   We need to be smarter. */
 664
 665	/* obtain first word - actual packet3 header */
 666	header = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 667
 668	/* Is it packet 3 ? */
 669	if ((*header >> 30) != 0x3) {
 670		DRM_ERROR("Not a packet3 header (0x%08x)\n", *header);
 671		return -EINVAL;
 672	}
 673
 674	count = (*header >> 16) & 0x3fff;
 675
 676	/* Check again now that we know how much data to expect */
 677	if ((count + 2) * 4 > drm_buffer_unprocessed(cmdbuf->buffer)) {
 678		DRM_ERROR
 679		    ("Expected packet3 of length %d but have only %d bytes left\n",
 680		     (count + 2) * 4, drm_buffer_unprocessed(cmdbuf->buffer));
 681		return -EINVAL;
 682	}
 683
 684	/* Is it a packet type we know about ? */
 685	switch (*header & 0xff00) {
 686	case RADEON_3D_LOAD_VBPNTR:	/* load vertex array pointers */
 687		return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, *header);
 688
 689	case RADEON_CNTL_BITBLT_MULTI:
 690		return r300_emit_bitblt_multi(dev_priv, cmdbuf);
 691
 692	case RADEON_CP_INDX_BUFFER:
 693		DRM_ERROR("packet3 INDX_BUFFER without preceding 3D_DRAW_INDX_2 is illegal.\n");
 694		return -EINVAL;
 695	case RADEON_CP_3D_DRAW_IMMD_2:
 696		/* triggers drawing using in-packet vertex data */
 697	case RADEON_CP_3D_DRAW_VBUF_2:
 698		/* triggers drawing of vertex buffers setup elsewhere */
 699		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
 700					   RADEON_PURGE_EMITED);
 701		break;
 702	case RADEON_CP_3D_DRAW_INDX_2:
 703		/* triggers drawing using indices to vertex buffer */
 704		/* whenever we send vertex we clear flush & purge */
 705		dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
 706					   RADEON_PURGE_EMITED);
 707		return r300_emit_draw_indx_2(dev_priv, cmdbuf);
 708	case RADEON_WAIT_FOR_IDLE:
 709	case RADEON_CP_NOP:
 710		/* these packets are safe */
 711		break;
 712	default:
 713		DRM_ERROR("Unknown packet3 header (0x%08x)\n", *header);
 714		return -EINVAL;
 715	}
 716
 717	BEGIN_RING(count + 2);
 718	OUT_RING_DRM_BUFFER(cmdbuf->buffer, count + 2);
 719	ADVANCE_RING();
 720
 721	return 0;
 722}
 723
 724/**
 725 * Emit a rendering packet3 from userspace.
 726 * Called by r300_do_cp_cmdbuf.
 727 */
 728static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
 729					drm_radeon_kcmd_buffer_t *cmdbuf,
 730					drm_r300_cmd_header_t header)
 731{
 732	int n;
 733	int ret;
 734	int orig_iter = cmdbuf->buffer->iterator;
 735
 736	/* This is a do-while-loop so that we run the interior at least once,
 737	 * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
 738	 */
 739	n = 0;
 740	do {
 741		if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
 742			ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
 743			if (ret)
 744				return ret;
 745
 746			cmdbuf->buffer->iterator = orig_iter;
 747		}
 748
 749		switch (header.packet3.packet) {
 750		case R300_CMD_PACKET3_CLEAR:
 751			DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
 752			ret = r300_emit_clear(dev_priv, cmdbuf);
 753			if (ret) {
 754				DRM_ERROR("r300_emit_clear failed\n");
 755				return ret;
 756			}
 757			break;
 758
 759		case R300_CMD_PACKET3_RAW:
 760			DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
 761			ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
 762			if (ret) {
 763				DRM_ERROR("r300_emit_raw_packet3 failed\n");
 764				return ret;
 765			}
 766			break;
 767
 768		default:
 769			DRM_ERROR("bad packet3 type %i at byte %d\n",
 770				  header.packet3.packet,
 771				  cmdbuf->buffer->iterator - (int)sizeof(header));
 772			return -EINVAL;
 773		}
 774
 775		n += R300_SIMULTANEOUS_CLIPRECTS;
 776	} while (n < cmdbuf->nbox);
 777
 778	return 0;
 779}
 780
 781/* Some of the R300 chips seem to be extremely touchy about the two registers
 782 * that are configured in r300_pacify.
 783 * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
 784 * sends a command buffer that contains only state setting commands and a
 785 * vertex program/parameter upload sequence, this will eventually lead to a
 786 * lockup, unless the sequence is bracketed by calls to r300_pacify.
 787 * So we should take great care to *always* call r300_pacify before
 788 * *anything* 3D related, and again afterwards. This is what the
 789 * call bracket in r300_do_cp_cmdbuf is for.
 790 */
 791
 792/**
 793 * Emit the sequence to pacify R300.
 794 */
 795static void r300_pacify(drm_radeon_private_t *dev_priv)
 796{
 797	uint32_t cache_z, cache_3d, cache_2d;
 798	RING_LOCALS;
 799
 800	cache_z = R300_ZC_FLUSH;
 801	cache_2d = R300_RB2D_DC_FLUSH;
 802	cache_3d = R300_RB3D_DC_FLUSH;
 803	if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
 804		/* we can purge, primitive where draw since last purge */
 805		cache_z |= R300_ZC_FREE;
 806		cache_2d |= R300_RB2D_DC_FREE;
 807		cache_3d |= R300_RB3D_DC_FREE;
 808	}
 809
 810	/* flush & purge zbuffer */
 811	BEGIN_RING(2);
 812	OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
 813	OUT_RING(cache_z);
 814	ADVANCE_RING();
 815	/* flush & purge 3d */
 816	BEGIN_RING(2);
 817	OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
 818	OUT_RING(cache_3d);
 819	ADVANCE_RING();
 820	/* flush & purge texture */
 821	BEGIN_RING(2);
 822	OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
 823	OUT_RING(0);
 824	ADVANCE_RING();
 825	/* FIXME: is this one really needed ? */
 826	BEGIN_RING(2);
 827	OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
 828	OUT_RING(0);
 829	ADVANCE_RING();
 830	BEGIN_RING(2);
 831	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 832	OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
 833	ADVANCE_RING();
 834	/* flush & purge 2d through E2 as RB2D will trigger lockup */
 835	BEGIN_RING(4);
 836	OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
 837	OUT_RING(cache_2d);
 838	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 839	OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
 840		 RADEON_WAIT_HOST_IDLECLEAN);
 841	ADVANCE_RING();
 842	/* set flush & purge flags */
 843	dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
 844}
 845
 846/**
 847 * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
 848 * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
 849 * be careful about how this function is called.
 850 */
 851static void r300_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 852{
 853	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
 854	struct drm_radeon_master_private *master_priv = master->driver_priv;
 855
 856	buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
 857	buf->pending = 1;
 858	buf->used = 0;
 859}
 860
 861static void r300_cmd_wait(drm_radeon_private_t * dev_priv,
 862			  drm_r300_cmd_header_t header)
 863{
 864	u32 wait_until;
 865	RING_LOCALS;
 866
 867	if (!header.wait.flags)
 868		return;
 869
 870	wait_until = 0;
 871
 872	switch(header.wait.flags) {
 873	case R300_WAIT_2D:
 874		wait_until = RADEON_WAIT_2D_IDLE;
 875		break;
 876	case R300_WAIT_3D:
 877		wait_until = RADEON_WAIT_3D_IDLE;
 878		break;
 879	case R300_NEW_WAIT_2D_3D:
 880		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_3D_IDLE;
 881		break;
 882	case R300_NEW_WAIT_2D_2D_CLEAN:
 883		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
 884		break;
 885	case R300_NEW_WAIT_3D_3D_CLEAN:
 886		wait_until = RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
 887		break;
 888	case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
 889		wait_until = RADEON_WAIT_2D_IDLE|RADEON_WAIT_2D_IDLECLEAN;
 890		wait_until |= RADEON_WAIT_3D_IDLE|RADEON_WAIT_3D_IDLECLEAN;
 891		break;
 892	default:
 893		return;
 894	}
 895
 896	BEGIN_RING(2);
 897	OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
 898	OUT_RING(wait_until);
 899	ADVANCE_RING();
 900}
 901
 902static int r300_scratch(drm_radeon_private_t *dev_priv,
 903			drm_radeon_kcmd_buffer_t *cmdbuf,
 904			drm_r300_cmd_header_t header)
 905{
 906	u32 *ref_age_base;
 907	u32 i, *buf_idx, h_pending;
 908	u64 *ptr_addr;
 909	u64 stack_ptr_addr;
 910	RING_LOCALS;
 911
 912	if (drm_buffer_unprocessed(cmdbuf->buffer) <
 913	    (sizeof(u64) + header.scratch.n_bufs * sizeof(*buf_idx))) {
 914		return -EINVAL;
 915	}
 916
 917	if (header.scratch.reg >= 5) {
 918		return -EINVAL;
 919	}
 920
 921	dev_priv->scratch_ages[header.scratch.reg]++;
 922
 923	ptr_addr = drm_buffer_read_object(cmdbuf->buffer,
 924			sizeof(stack_ptr_addr), &stack_ptr_addr);
 925	ref_age_base = (u32 *)(unsigned long)get_unaligned(ptr_addr);
 926
 927	for (i=0; i < header.scratch.n_bufs; i++) {
 928		buf_idx = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 929		*buf_idx *= 2; /* 8 bytes per buf */
 930
 931		if (copy_to_user(ref_age_base + *buf_idx,
 932				&dev_priv->scratch_ages[header.scratch.reg],
 933				sizeof(u32)))
 934			return -EINVAL;
 935
 936		if (copy_from_user(&h_pending,
 937				ref_age_base + *buf_idx + 1,
 938				sizeof(u32)))
 939			return -EINVAL;
 940
 941		if (h_pending == 0)
 942			return -EINVAL;
 943
 944		h_pending--;
 945
 946		if (copy_to_user(ref_age_base + *buf_idx + 1,
 947					&h_pending,
 948					sizeof(u32)))
 949			return -EINVAL;
 950
 951		drm_buffer_advance(cmdbuf->buffer, sizeof(*buf_idx));
 952	}
 953
 954	BEGIN_RING(2);
 955	OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
 956	OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
 957	ADVANCE_RING();
 958
 959	return 0;
 960}
 961
 962/**
 963 * Uploads user-supplied vertex program instructions or parameters onto
 964 * the graphics card.
 965 * Called by r300_do_cp_cmdbuf.
 966 */
 967static inline int r300_emit_r500fp(drm_radeon_private_t *dev_priv,
 968				       drm_radeon_kcmd_buffer_t *cmdbuf,
 969				       drm_r300_cmd_header_t header)
 970{
 971	int sz;
 972	int addr;
 973	int type;
 974	int isclamp;
 975	int stride;
 976	RING_LOCALS;
 977
 978	sz = header.r500fp.count;
 979	/* address is 9 bits 0 - 8, bit 1 of flags is part of address */
 980	addr = ((header.r500fp.adrhi_flags & 1) << 8) | header.r500fp.adrlo;
 981
 982	type = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
 983	isclamp = !!(header.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
 984
 985	addr |= (type << 16);
 986	addr |= (isclamp << 17);
 987
 988	stride = type ? 4 : 6;
 989
 990	DRM_DEBUG("r500fp %d %d type: %d\n", sz, addr, type);
 991	if (!sz)
 992		return 0;
 993	if (sz * stride * 4 > drm_buffer_unprocessed(cmdbuf->buffer))
 994		return -EINVAL;
 995
 996	BEGIN_RING(3 + sz * stride);
 997	OUT_RING_REG(R500_GA_US_VECTOR_INDEX, addr);
 998	OUT_RING(CP_PACKET0_TABLE(R500_GA_US_VECTOR_DATA, sz * stride - 1));
 999	OUT_RING_DRM_BUFFER(cmdbuf->buffer, sz * stride);
1000
1001	ADVANCE_RING();
1002
1003	return 0;
1004}
1005
1006
1007/**
1008 * Parses and validates a user-supplied command buffer and emits appropriate
1009 * commands on the DMA ring buffer.
1010 * Called by the ioctl handler function radeon_cp_cmdbuf.
1011 */
1012int r300_do_cp_cmdbuf(struct drm_device *dev,
1013		      struct drm_file *file_priv,
1014		      drm_radeon_kcmd_buffer_t *cmdbuf)
1015{
1016	drm_radeon_private_t *dev_priv = dev->dev_private;
1017	struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1018	struct drm_device_dma *dma = dev->dma;
1019	struct drm_buf *buf = NULL;
1020	int emit_dispatch_age = 0;
1021	int ret = 0;
1022
1023	DRM_DEBUG("\n");
1024
1025	/* pacify */
1026	r300_pacify(dev_priv);
1027
1028	if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
1029		ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
1030		if (ret)
1031			goto cleanup;
1032	}
1033
1034	while (drm_buffer_unprocessed(cmdbuf->buffer)
1035			>= sizeof(drm_r300_cmd_header_t)) {
1036		int idx;
1037		drm_r300_cmd_header_t *header, stack_header;
1038
1039		header = drm_buffer_read_object(cmdbuf->buffer,
1040				sizeof(stack_header), &stack_header);
1041
1042		switch (header->header.cmd_type) {
1043		case R300_CMD_PACKET0:
1044			DRM_DEBUG("R300_CMD_PACKET0\n");
1045			ret = r300_emit_packet0(dev_priv, cmdbuf, *header);
1046			if (ret) {
1047				DRM_ERROR("r300_emit_packet0 failed\n");
1048				goto cleanup;
1049			}
1050			break;
1051
1052		case R300_CMD_VPU:
1053			DRM_DEBUG("R300_CMD_VPU\n");
1054			ret = r300_emit_vpu(dev_priv, cmdbuf, *header);
1055			if (ret) {
1056				DRM_ERROR("r300_emit_vpu failed\n");
1057				goto cleanup;
1058			}
1059			break;
1060
1061		case R300_CMD_PACKET3:
1062			DRM_DEBUG("R300_CMD_PACKET3\n");
1063			ret = r300_emit_packet3(dev_priv, cmdbuf, *header);
1064			if (ret) {
1065				DRM_ERROR("r300_emit_packet3 failed\n");
1066				goto cleanup;
1067			}
1068			break;
1069
1070		case R300_CMD_END3D:
1071			DRM_DEBUG("R300_CMD_END3D\n");
1072			/* TODO:
1073			   Ideally userspace driver should not need to issue this call,
1074			   i.e. the drm driver should issue it automatically and prevent
1075			   lockups.
1076
1077			   In practice, we do not understand why this call is needed and what
1078			   it does (except for some vague guesses that it has to do with cache
1079			   coherence) and so the user space driver does it.
1080
1081			   Once we are sure which uses prevent lockups the code could be moved
1082			   into the kernel and the userspace driver will not
1083			   need to use this command.
1084
1085			   Note that issuing this command does not hurt anything
1086			   except, possibly, performance */
1087			r300_pacify(dev_priv);
1088			break;
1089
1090		case R300_CMD_CP_DELAY:
1091			/* simple enough, we can do it here */
1092			DRM_DEBUG("R300_CMD_CP_DELAY\n");
1093			{
1094				int i;
1095				RING_LOCALS;
1096
1097				BEGIN_RING(header->delay.count);
1098				for (i = 0; i < header->delay.count; i++)
1099					OUT_RING(RADEON_CP_PACKET2);
1100				ADVANCE_RING();
1101			}
1102			break;
1103
1104		case R300_CMD_DMA_DISCARD:
1105			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
1106			idx = header->dma.buf_idx;
1107			if (idx < 0 || idx >= dma->buf_count) {
1108				DRM_ERROR("buffer index %d (of %d max)\n",
1109					  idx, dma->buf_count - 1);
1110				ret = -EINVAL;
1111				goto cleanup;
1112			}
1113
1114			buf = dma->buflist[idx];
1115			if (buf->file_priv != file_priv || buf->pending) {
1116				DRM_ERROR("bad buffer %p %p %d\n",
1117					  buf->file_priv, file_priv,
1118					  buf->pending);
1119				ret = -EINVAL;
1120				goto cleanup;
1121			}
1122
1123			emit_dispatch_age = 1;
1124			r300_discard_buffer(dev, file_priv->master, buf);
1125			break;
1126
1127		case R300_CMD_WAIT:
1128			DRM_DEBUG("R300_CMD_WAIT\n");
1129			r300_cmd_wait(dev_priv, *header);
1130			break;
1131
1132		case R300_CMD_SCRATCH:
1133			DRM_DEBUG("R300_CMD_SCRATCH\n");
1134			ret = r300_scratch(dev_priv, cmdbuf, *header);
1135			if (ret) {
1136				DRM_ERROR("r300_scratch failed\n");
1137				goto cleanup;
1138			}
1139			break;
1140
1141		case R300_CMD_R500FP:
1142			if ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV515) {
1143				DRM_ERROR("Calling r500 command on r300 card\n");
1144				ret = -EINVAL;
1145				goto cleanup;
1146			}
1147			DRM_DEBUG("R300_CMD_R500FP\n");
1148			ret = r300_emit_r500fp(dev_priv, cmdbuf, *header);
1149			if (ret) {
1150				DRM_ERROR("r300_emit_r500fp failed\n");
1151				goto cleanup;
1152			}
1153			break;
1154		default:
1155			DRM_ERROR("bad cmd_type %i at byte %d\n",
1156				  header->header.cmd_type,
1157				  cmdbuf->buffer->iterator - (int)sizeof(*header));
1158			ret = -EINVAL;
1159			goto cleanup;
1160		}
1161	}
1162
1163	DRM_DEBUG("END\n");
1164
1165      cleanup:
1166	r300_pacify(dev_priv);
1167
1168	/* We emit the vertex buffer age here, outside the pacifier "brackets"
1169	 * for two reasons:
1170	 *  (1) This may coalesce multiple age emissions into a single one and
1171	 *  (2) more importantly, some chips lock up hard when scratch registers
1172	 *      are written inside the pacifier bracket.
1173	 */
1174	if (emit_dispatch_age) {
1175		RING_LOCALS;
1176
1177		/* Emit the vertex buffer age */
1178		BEGIN_RING(2);
1179		RADEON_DISPATCH_AGE(master_priv->sarea_priv->last_dispatch);
1180		ADVANCE_RING();
1181	}
1182
1183	COMMIT_RING();
1184
1185	return ret;
1186}