Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.2.
  1/*
  2 * Copyright 2009 Advanced Micro Devices, Inc.
  3 *
  4 * Permission is hereby granted, free of charge, to any person obtaining a
  5 * copy of this software and associated documentation files (the "Software"),
  6 * to deal in the Software without restriction, including without limitation
  7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 * and/or sell copies of the Software, and to permit persons to whom the
  9 * Software is furnished to do so, subject to the following conditions:
 10 *
 11 * The above copyright notice and this permission notice (including the next
 12 * paragraph) shall be included in all copies or substantial portions of the
 13 * Software.
 14 *
 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 21 * DEALINGS IN THE SOFTWARE.
 22 *
 23 * Authors:
 24 *     Alex Deucher <alexander.deucher@amd.com>
 25 */
 26#include "drmP.h"
 27#include "drm.h"
 28#include "radeon_drm.h"
 29#include "radeon_drv.h"
 30
 31#include "r600_blit_shaders.h"
 32
 33#define DI_PT_RECTLIST        0x11
 34#define DI_INDEX_SIZE_16_BIT  0x0
 35#define DI_SRC_SEL_AUTO_INDEX 0x2
 36
 37#define FMT_8                 0x1
 38#define FMT_5_6_5             0x8
 39#define FMT_8_8_8_8           0x1a
 40#define COLOR_8               0x1
 41#define COLOR_5_6_5           0x8
 42#define COLOR_8_8_8_8         0x1a
 43
 44static inline void
 45set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
 46{
 47	u32 cb_color_info;
 48	int pitch, slice;
 49	RING_LOCALS;
 50	DRM_DEBUG("\n");
 51
 52	h = ALIGN(h, 8);
 53	if (h < 8)
 54		h = 8;
 55
 56	cb_color_info = ((format << 2) | (1 << 27));
 57	pitch = (w / 8) - 1;
 58	slice = ((w * h) / 64) - 1;
 59
 60	if (((dev_priv->flags & RADEON_FAMILY_MASK) > CHIP_R600) &&
 61	    ((dev_priv->flags & RADEON_FAMILY_MASK) < CHIP_RV770)) {
 62		BEGIN_RING(21 + 2);
 63		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 64		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 65		OUT_RING(gpu_addr >> 8);
 66		OUT_RING(CP_PACKET3(R600_IT_SURFACE_BASE_UPDATE, 0));
 67		OUT_RING(2 << 0);
 68	} else {
 69		BEGIN_RING(21);
 70		OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 71		OUT_RING((R600_CB_COLOR0_BASE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 72		OUT_RING(gpu_addr >> 8);
 73	}
 74
 75	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 76	OUT_RING((R600_CB_COLOR0_SIZE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 77	OUT_RING((pitch << 0) | (slice << 10));
 78
 79	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 80	OUT_RING((R600_CB_COLOR0_VIEW - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 81	OUT_RING(0);
 82
 83	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 84	OUT_RING((R600_CB_COLOR0_INFO - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 85	OUT_RING(cb_color_info);
 86
 87	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 88	OUT_RING((R600_CB_COLOR0_TILE - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 89	OUT_RING(0);
 90
 91	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 92	OUT_RING((R600_CB_COLOR0_FRAG - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 93	OUT_RING(0);
 94
 95	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
 96	OUT_RING((R600_CB_COLOR0_MASK - R600_SET_CONTEXT_REG_OFFSET) >> 2);
 97	OUT_RING(0);
 98
 99	ADVANCE_RING();
100}
101
102static inline void
103cp_set_surface_sync(drm_radeon_private_t *dev_priv,
104		    u32 sync_type, u32 size, u64 mc_addr)
105{
106	u32 cp_coher_size;
107	RING_LOCALS;
108	DRM_DEBUG("\n");
109
110	if (size == 0xffffffff)
111		cp_coher_size = 0xffffffff;
112	else
113		cp_coher_size = ((size + 255) >> 8);
114
115	BEGIN_RING(5);
116	OUT_RING(CP_PACKET3(R600_IT_SURFACE_SYNC, 3));
117	OUT_RING(sync_type);
118	OUT_RING(cp_coher_size);
119	OUT_RING((mc_addr >> 8));
120	OUT_RING(10); /* poll interval */
121	ADVANCE_RING();
122}
123
124static inline void
125set_shaders(struct drm_device *dev)
126{
127	drm_radeon_private_t *dev_priv = dev->dev_private;
128	u64 gpu_addr;
129	int i;
130	u32 *vs, *ps;
131	uint32_t sq_pgm_resources;
132	RING_LOCALS;
133	DRM_DEBUG("\n");
134
135	/* load shaders */
136	vs = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset);
137	ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
138
139	for (i = 0; i < r6xx_vs_size; i++)
140		vs[i] = cpu_to_le32(r6xx_vs[i]);
141	for (i = 0; i < r6xx_ps_size; i++)
142		ps[i] = cpu_to_le32(r6xx_ps[i]);
143
144	dev_priv->blit_vb->used = 512;
145
146	gpu_addr = dev_priv->gart_buffers_offset + dev_priv->blit_vb->offset;
147
148	/* setup shader regs */
149	sq_pgm_resources = (1 << 0);
150
151	BEGIN_RING(9 + 12);
152	/* VS */
153	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
154	OUT_RING((R600_SQ_PGM_START_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
155	OUT_RING(gpu_addr >> 8);
156
157	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
158	OUT_RING((R600_SQ_PGM_RESOURCES_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
159	OUT_RING(sq_pgm_resources);
160
161	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
162	OUT_RING((R600_SQ_PGM_CF_OFFSET_VS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
163	OUT_RING(0);
164
165	/* PS */
166	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
167	OUT_RING((R600_SQ_PGM_START_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
168	OUT_RING((gpu_addr + 256) >> 8);
169
170	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
171	OUT_RING((R600_SQ_PGM_RESOURCES_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
172	OUT_RING(sq_pgm_resources | (1 << 28));
173
174	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
175	OUT_RING((R600_SQ_PGM_EXPORTS_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
176	OUT_RING(2);
177
178	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 1));
179	OUT_RING((R600_SQ_PGM_CF_OFFSET_PS - R600_SET_CONTEXT_REG_OFFSET) >> 2);
180	OUT_RING(0);
181	ADVANCE_RING();
182
183	cp_set_surface_sync(dev_priv,
184			    R600_SH_ACTION_ENA, 512, gpu_addr);
185}
186
187static inline void
188set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
189{
190	uint32_t sq_vtx_constant_word2;
191	RING_LOCALS;
192	DRM_DEBUG("\n");
193
194	sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
195#ifdef __BIG_ENDIAN
196	sq_vtx_constant_word2 |= (2 << 30);
197#endif
198
199	BEGIN_RING(9);
200	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
201	OUT_RING(0x460);
202	OUT_RING(gpu_addr & 0xffffffff);
203	OUT_RING(48 - 1);
204	OUT_RING(sq_vtx_constant_word2);
205	OUT_RING(1 << 0);
206	OUT_RING(0);
207	OUT_RING(0);
208	OUT_RING(R600_SQ_TEX_VTX_VALID_BUFFER << 30);
209	ADVANCE_RING();
210
211	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
212	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
213	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
214	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
215	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
216		cp_set_surface_sync(dev_priv,
217				    R600_TC_ACTION_ENA, 48, gpu_addr);
218	else
219		cp_set_surface_sync(dev_priv,
220				    R600_VC_ACTION_ENA, 48, gpu_addr);
221}
222
223static inline void
224set_tex_resource(drm_radeon_private_t *dev_priv,
225		 int format, int w, int h, int pitch, u64 gpu_addr)
226{
227	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
228	RING_LOCALS;
229	DRM_DEBUG("\n");
230
231	if (h < 1)
232		h = 1;
233
234	sq_tex_resource_word0 = (1 << 0);
235	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
236				  ((w - 1) << 19));
237
238	sq_tex_resource_word1 = (format << 26);
239	sq_tex_resource_word1 |= ((h - 1) << 0);
240
241	sq_tex_resource_word4 = ((1 << 14) |
242				 (0 << 16) |
243				 (1 << 19) |
244				 (2 << 22) |
245				 (3 << 25));
246
247	BEGIN_RING(9);
248	OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
249	OUT_RING(0);
250	OUT_RING(sq_tex_resource_word0);
251	OUT_RING(sq_tex_resource_word1);
252	OUT_RING(gpu_addr >> 8);
253	OUT_RING(gpu_addr >> 8);
254	OUT_RING(sq_tex_resource_word4);
255	OUT_RING(0);
256	OUT_RING(R600_SQ_TEX_VTX_VALID_TEXTURE << 30);
257	ADVANCE_RING();
258
259}
260
261static inline void
262set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
263{
264	RING_LOCALS;
265	DRM_DEBUG("\n");
266
267	BEGIN_RING(12);
268	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
269	OUT_RING((R600_PA_SC_SCREEN_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
270	OUT_RING((x1 << 0) | (y1 << 16));
271	OUT_RING((x2 << 0) | (y2 << 16));
272
273	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
274	OUT_RING((R600_PA_SC_GENERIC_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
275	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
276	OUT_RING((x2 << 0) | (y2 << 16));
277
278	OUT_RING(CP_PACKET3(R600_IT_SET_CONTEXT_REG, 2));
279	OUT_RING((R600_PA_SC_WINDOW_SCISSOR_TL - R600_SET_CONTEXT_REG_OFFSET) >> 2);
280	OUT_RING((x1 << 0) | (y1 << 16) | (1 << 31));
281	OUT_RING((x2 << 0) | (y2 << 16));
282	ADVANCE_RING();
283}
284
285static inline void
286draw_auto(drm_radeon_private_t *dev_priv)
287{
288	RING_LOCALS;
289	DRM_DEBUG("\n");
290
291	BEGIN_RING(10);
292	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
293	OUT_RING((R600_VGT_PRIMITIVE_TYPE - R600_SET_CONFIG_REG_OFFSET) >> 2);
294	OUT_RING(DI_PT_RECTLIST);
295
296	OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
297#ifdef __BIG_ENDIAN
298	OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
299#else
300	OUT_RING(DI_INDEX_SIZE_16_BIT);
301#endif
302
303	OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
304	OUT_RING(1);
305
306	OUT_RING(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
307	OUT_RING(3);
308	OUT_RING(DI_SRC_SEL_AUTO_INDEX);
309
310	ADVANCE_RING();
311	COMMIT_RING();
312}
313
314static inline void
315set_default_state(drm_radeon_private_t *dev_priv)
316{
317	int i;
318	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
319	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
320	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
321	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
322	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
323	RING_LOCALS;
324
325	switch ((dev_priv->flags & RADEON_FAMILY_MASK)) {
326	case CHIP_R600:
327		num_ps_gprs = 192;
328		num_vs_gprs = 56;
329		num_temp_gprs = 4;
330		num_gs_gprs = 0;
331		num_es_gprs = 0;
332		num_ps_threads = 136;
333		num_vs_threads = 48;
334		num_gs_threads = 4;
335		num_es_threads = 4;
336		num_ps_stack_entries = 128;
337		num_vs_stack_entries = 128;
338		num_gs_stack_entries = 0;
339		num_es_stack_entries = 0;
340		break;
341	case CHIP_RV630:
342	case CHIP_RV635:
343		num_ps_gprs = 84;
344		num_vs_gprs = 36;
345		num_temp_gprs = 4;
346		num_gs_gprs = 0;
347		num_es_gprs = 0;
348		num_ps_threads = 144;
349		num_vs_threads = 40;
350		num_gs_threads = 4;
351		num_es_threads = 4;
352		num_ps_stack_entries = 40;
353		num_vs_stack_entries = 40;
354		num_gs_stack_entries = 32;
355		num_es_stack_entries = 16;
356		break;
357	case CHIP_RV610:
358	case CHIP_RV620:
359	case CHIP_RS780:
360	case CHIP_RS880:
361	default:
362		num_ps_gprs = 84;
363		num_vs_gprs = 36;
364		num_temp_gprs = 4;
365		num_gs_gprs = 0;
366		num_es_gprs = 0;
367		num_ps_threads = 136;
368		num_vs_threads = 48;
369		num_gs_threads = 4;
370		num_es_threads = 4;
371		num_ps_stack_entries = 40;
372		num_vs_stack_entries = 40;
373		num_gs_stack_entries = 32;
374		num_es_stack_entries = 16;
375		break;
376	case CHIP_RV670:
377		num_ps_gprs = 144;
378		num_vs_gprs = 40;
379		num_temp_gprs = 4;
380		num_gs_gprs = 0;
381		num_es_gprs = 0;
382		num_ps_threads = 136;
383		num_vs_threads = 48;
384		num_gs_threads = 4;
385		num_es_threads = 4;
386		num_ps_stack_entries = 40;
387		num_vs_stack_entries = 40;
388		num_gs_stack_entries = 32;
389		num_es_stack_entries = 16;
390		break;
391	case CHIP_RV770:
392		num_ps_gprs = 192;
393		num_vs_gprs = 56;
394		num_temp_gprs = 4;
395		num_gs_gprs = 0;
396		num_es_gprs = 0;
397		num_ps_threads = 188;
398		num_vs_threads = 60;
399		num_gs_threads = 0;
400		num_es_threads = 0;
401		num_ps_stack_entries = 256;
402		num_vs_stack_entries = 256;
403		num_gs_stack_entries = 0;
404		num_es_stack_entries = 0;
405		break;
406	case CHIP_RV730:
407	case CHIP_RV740:
408		num_ps_gprs = 84;
409		num_vs_gprs = 36;
410		num_temp_gprs = 4;
411		num_gs_gprs = 0;
412		num_es_gprs = 0;
413		num_ps_threads = 188;
414		num_vs_threads = 60;
415		num_gs_threads = 0;
416		num_es_threads = 0;
417		num_ps_stack_entries = 128;
418		num_vs_stack_entries = 128;
419		num_gs_stack_entries = 0;
420		num_es_stack_entries = 0;
421		break;
422	case CHIP_RV710:
423		num_ps_gprs = 192;
424		num_vs_gprs = 56;
425		num_temp_gprs = 4;
426		num_gs_gprs = 0;
427		num_es_gprs = 0;
428		num_ps_threads = 144;
429		num_vs_threads = 48;
430		num_gs_threads = 0;
431		num_es_threads = 0;
432		num_ps_stack_entries = 128;
433		num_vs_stack_entries = 128;
434		num_gs_stack_entries = 0;
435		num_es_stack_entries = 0;
436		break;
437	}
438
439	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
440	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
441	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
442	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880) ||
443	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV710))
444		sq_config = 0;
445	else
446		sq_config = R600_VC_ENABLE;
447
448	sq_config |= (R600_DX9_CONSTS |
449		      R600_ALU_INST_PREFER_VECTOR |
450		      R600_PS_PRIO(0) |
451		      R600_VS_PRIO(1) |
452		      R600_GS_PRIO(2) |
453		      R600_ES_PRIO(3));
454
455	sq_gpr_resource_mgmt_1 = (R600_NUM_PS_GPRS(num_ps_gprs) |
456				  R600_NUM_VS_GPRS(num_vs_gprs) |
457				  R600_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
458	sq_gpr_resource_mgmt_2 = (R600_NUM_GS_GPRS(num_gs_gprs) |
459				  R600_NUM_ES_GPRS(num_es_gprs));
460	sq_thread_resource_mgmt = (R600_NUM_PS_THREADS(num_ps_threads) |
461				   R600_NUM_VS_THREADS(num_vs_threads) |
462				   R600_NUM_GS_THREADS(num_gs_threads) |
463				   R600_NUM_ES_THREADS(num_es_threads));
464	sq_stack_resource_mgmt_1 = (R600_NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
465				    R600_NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
466	sq_stack_resource_mgmt_2 = (R600_NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
467				    R600_NUM_ES_STACK_ENTRIES(num_es_stack_entries));
468
469	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV770) {
470		BEGIN_RING(r7xx_default_size + 10);
471		for (i = 0; i < r7xx_default_size; i++)
472			OUT_RING(r7xx_default_state[i]);
473	} else {
474		BEGIN_RING(r6xx_default_size + 10);
475		for (i = 0; i < r6xx_default_size; i++)
476			OUT_RING(r6xx_default_state[i]);
477	}
478	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
479	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
480	/* SQ config */
481	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 6));
482	OUT_RING((R600_SQ_CONFIG - R600_SET_CONFIG_REG_OFFSET) >> 2);
483	OUT_RING(sq_config);
484	OUT_RING(sq_gpr_resource_mgmt_1);
485	OUT_RING(sq_gpr_resource_mgmt_2);
486	OUT_RING(sq_thread_resource_mgmt);
487	OUT_RING(sq_stack_resource_mgmt_1);
488	OUT_RING(sq_stack_resource_mgmt_2);
489	ADVANCE_RING();
490}
491
492static inline uint32_t i2f(uint32_t input)
493{
494	u32 result, i, exponent, fraction;
495
496	if ((input & 0x3fff) == 0)
497		result = 0; /* 0 is a special case */
498	else {
499		exponent = 140; /* exponent biased by 127; */
500		fraction = (input & 0x3fff) << 10; /* cheat and only
501						      handle numbers below 2^^15 */
502		for (i = 0; i < 14; i++) {
503			if (fraction & 0x800000)
504				break;
505			else {
506				fraction = fraction << 1; /* keep
507							     shifting left until top bit = 1 */
508				exponent = exponent - 1;
509			}
510		}
511		result = exponent << 23 | (fraction & 0x7fffff); /* mask
512								    off top bit; assumed 1 */
513	}
514	return result;
515}
516
517
518static inline int r600_nomm_get_vb(struct drm_device *dev)
519{
520	drm_radeon_private_t *dev_priv = dev->dev_private;
521	dev_priv->blit_vb = radeon_freelist_get(dev);
522	if (!dev_priv->blit_vb) {
523		DRM_ERROR("Unable to allocate vertex buffer for blit\n");
524		return -EAGAIN;
525	}
526	return 0;
527}
528
529static inline void r600_nomm_put_vb(struct drm_device *dev)
530{
531	drm_radeon_private_t *dev_priv = dev->dev_private;
532
533	dev_priv->blit_vb->used = 0;
534	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
535}
536
537static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
538{
539	drm_radeon_private_t *dev_priv = dev->dev_private;
540	return (((char *)dev->agp_buffer_map->handle +
541		 dev_priv->blit_vb->offset + dev_priv->blit_vb->used));
542}
543
544int
545r600_prepare_blit_copy(struct drm_device *dev, struct drm_file *file_priv)
546{
547	drm_radeon_private_t *dev_priv = dev->dev_private;
548	int ret;
549	DRM_DEBUG("\n");
550
551	ret = r600_nomm_get_vb(dev);
552	if (ret)
553		return ret;
554
555	dev_priv->blit_vb->file_priv = file_priv;
556
557	set_default_state(dev_priv);
558	set_shaders(dev);
559
560	return 0;
561}
562
563
564void
565r600_done_blit_copy(struct drm_device *dev)
566{
567	drm_radeon_private_t *dev_priv = dev->dev_private;
568	RING_LOCALS;
569	DRM_DEBUG("\n");
570
571	BEGIN_RING(5);
572	OUT_RING(CP_PACKET3(R600_IT_EVENT_WRITE, 0));
573	OUT_RING(R600_CACHE_FLUSH_AND_INV_EVENT);
574	/* wait for 3D idle clean */
575	OUT_RING(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1));
576	OUT_RING((R600_WAIT_UNTIL - R600_SET_CONFIG_REG_OFFSET) >> 2);
577	OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_3D_IDLECLEAN);
578
579	ADVANCE_RING();
580	COMMIT_RING();
581
582	r600_nomm_put_vb(dev);
583}
584
585void
586r600_blit_copy(struct drm_device *dev,
587	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
588	       int size_bytes)
589{
590	drm_radeon_private_t *dev_priv = dev->dev_private;
591	int max_bytes;
592	u64 vb_addr;
593	u32 *vb;
594
595	vb = r600_nomm_get_vb_ptr(dev);
596
597	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
598		max_bytes = 8192;
599
600		while (size_bytes) {
601			int cur_size = size_bytes;
602			int src_x = src_gpu_addr & 255;
603			int dst_x = dst_gpu_addr & 255;
604			int h = 1;
605			src_gpu_addr = src_gpu_addr & ~255;
606			dst_gpu_addr = dst_gpu_addr & ~255;
607
608			if (!src_x && !dst_x) {
609				h = (cur_size / max_bytes);
610				if (h > 8192)
611					h = 8192;
612				if (h == 0)
613					h = 1;
614				else
615					cur_size = max_bytes;
616			} else {
617				if (cur_size > max_bytes)
618					cur_size = max_bytes;
619				if (cur_size > (max_bytes - dst_x))
620					cur_size = (max_bytes - dst_x);
621				if (cur_size > (max_bytes - src_x))
622					cur_size = (max_bytes - src_x);
623			}
624
625			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
626
627				r600_nomm_put_vb(dev);
628				r600_nomm_get_vb(dev);
629				if (!dev_priv->blit_vb)
630					return;
631				set_shaders(dev);
632				vb = r600_nomm_get_vb_ptr(dev);
633			}
634
635			vb[0] = i2f(dst_x);
636			vb[1] = 0;
637			vb[2] = i2f(src_x);
638			vb[3] = 0;
639
640			vb[4] = i2f(dst_x);
641			vb[5] = i2f(h);
642			vb[6] = i2f(src_x);
643			vb[7] = i2f(h);
644
645			vb[8] = i2f(dst_x + cur_size);
646			vb[9] = i2f(h);
647			vb[10] = i2f(src_x + cur_size);
648			vb[11] = i2f(h);
649
650			/* src */
651			set_tex_resource(dev_priv, FMT_8,
652					 src_x + cur_size, h, src_x + cur_size,
653					 src_gpu_addr);
654
655			cp_set_surface_sync(dev_priv,
656					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
657
658			/* dst */
659			set_render_target(dev_priv, COLOR_8,
660					  dst_x + cur_size, h,
661					  dst_gpu_addr);
662
663			/* scissors */
664			set_scissors(dev_priv, dst_x, 0, dst_x + cur_size, h);
665
666			/* Vertex buffer setup */
667			vb_addr = dev_priv->gart_buffers_offset +
668				dev_priv->blit_vb->offset +
669				dev_priv->blit_vb->used;
670			set_vtx_resource(dev_priv, vb_addr);
671
672			/* draw */
673			draw_auto(dev_priv);
674
675			cp_set_surface_sync(dev_priv,
676					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
677					    cur_size * h, dst_gpu_addr);
678
679			vb += 12;
680			dev_priv->blit_vb->used += 12 * 4;
681
682			src_gpu_addr += cur_size * h;
683			dst_gpu_addr += cur_size * h;
684			size_bytes -= cur_size * h;
685		}
686	} else {
687		max_bytes = 8192 * 4;
688
689		while (size_bytes) {
690			int cur_size = size_bytes;
691			int src_x = (src_gpu_addr & 255);
692			int dst_x = (dst_gpu_addr & 255);
693			int h = 1;
694			src_gpu_addr = src_gpu_addr & ~255;
695			dst_gpu_addr = dst_gpu_addr & ~255;
696
697			if (!src_x && !dst_x) {
698				h = (cur_size / max_bytes);
699				if (h > 8192)
700					h = 8192;
701				if (h == 0)
702					h = 1;
703				else
704					cur_size = max_bytes;
705			} else {
706				if (cur_size > max_bytes)
707					cur_size = max_bytes;
708				if (cur_size > (max_bytes - dst_x))
709					cur_size = (max_bytes - dst_x);
710				if (cur_size > (max_bytes - src_x))
711					cur_size = (max_bytes - src_x);
712			}
713
714			if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
715				r600_nomm_put_vb(dev);
716				r600_nomm_get_vb(dev);
717				if (!dev_priv->blit_vb)
718					return;
719
720				set_shaders(dev);
721				vb = r600_nomm_get_vb_ptr(dev);
722			}
723
724			vb[0] = i2f(dst_x / 4);
725			vb[1] = 0;
726			vb[2] = i2f(src_x / 4);
727			vb[3] = 0;
728
729			vb[4] = i2f(dst_x / 4);
730			vb[5] = i2f(h);
731			vb[6] = i2f(src_x / 4);
732			vb[7] = i2f(h);
733
734			vb[8] = i2f((dst_x + cur_size) / 4);
735			vb[9] = i2f(h);
736			vb[10] = i2f((src_x + cur_size) / 4);
737			vb[11] = i2f(h);
738
739			/* src */
740			set_tex_resource(dev_priv, FMT_8_8_8_8,
741					 (src_x + cur_size) / 4,
742					 h, (src_x + cur_size) / 4,
743					 src_gpu_addr);
744
745			cp_set_surface_sync(dev_priv,
746					    R600_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
747
748			/* dst */
749			set_render_target(dev_priv, COLOR_8_8_8_8,
750					  (dst_x + cur_size) / 4, h,
751					  dst_gpu_addr);
752
753			/* scissors */
754			set_scissors(dev_priv, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
755
756			/* Vertex buffer setup */
757			vb_addr = dev_priv->gart_buffers_offset +
758				dev_priv->blit_vb->offset +
759				dev_priv->blit_vb->used;
760			set_vtx_resource(dev_priv, vb_addr);
761
762			/* draw */
763			draw_auto(dev_priv);
764
765			cp_set_surface_sync(dev_priv,
766					    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
767					    cur_size * h, dst_gpu_addr);
768
769			vb += 12;
770			dev_priv->blit_vb->used += 12 * 4;
771
772			src_gpu_addr += cur_size * h;
773			dst_gpu_addr += cur_size * h;
774			size_bytes -= cur_size * h;
775		}
776	}
777}
778
779void
780r600_blit_swap(struct drm_device *dev,
781	       uint64_t src_gpu_addr, uint64_t dst_gpu_addr,
782	       int sx, int sy, int dx, int dy,
783	       int w, int h, int src_pitch, int dst_pitch, int cpp)
784{
785	drm_radeon_private_t *dev_priv = dev->dev_private;
786	int cb_format, tex_format;
787	int sx2, sy2, dx2, dy2;
788	u64 vb_addr;
789	u32 *vb;
790
791	if ((dev_priv->blit_vb->used + 48) > dev_priv->blit_vb->total) {
792
793		r600_nomm_put_vb(dev);
794		r600_nomm_get_vb(dev);
795		if (!dev_priv->blit_vb)
796			return;
797
798		set_shaders(dev);
799	}
800	vb = r600_nomm_get_vb_ptr(dev);
801
802	sx2 = sx + w;
803	sy2 = sy + h;
804	dx2 = dx + w;
805	dy2 = dy + h;
806
807	vb[0] = i2f(dx);
808	vb[1] = i2f(dy);
809	vb[2] = i2f(sx);
810	vb[3] = i2f(sy);
811
812	vb[4] = i2f(dx);
813	vb[5] = i2f(dy2);
814	vb[6] = i2f(sx);
815	vb[7] = i2f(sy2);
816
817	vb[8] = i2f(dx2);
818	vb[9] = i2f(dy2);
819	vb[10] = i2f(sx2);
820	vb[11] = i2f(sy2);
821
822	switch(cpp) {
823	case 4:
824		cb_format = COLOR_8_8_8_8;
825		tex_format = FMT_8_8_8_8;
826		break;
827	case 2:
828		cb_format = COLOR_5_6_5;
829		tex_format = FMT_5_6_5;
830		break;
831	default:
832		cb_format = COLOR_8;
833		tex_format = FMT_8;
834		break;
835	}
836
837	/* src */
838	set_tex_resource(dev_priv, tex_format,
839			 src_pitch / cpp,
840			 sy2, src_pitch / cpp,
841			 src_gpu_addr);
842
843	cp_set_surface_sync(dev_priv,
844			    R600_TC_ACTION_ENA, src_pitch * sy2, src_gpu_addr);
845
846	/* dst */
847	set_render_target(dev_priv, cb_format,
848			  dst_pitch / cpp, dy2,
849			  dst_gpu_addr);
850
851	/* scissors */
852	set_scissors(dev_priv, dx, dy, dx2, dy2);
853
854	/* Vertex buffer setup */
855	vb_addr = dev_priv->gart_buffers_offset +
856		dev_priv->blit_vb->offset +
857		dev_priv->blit_vb->used;
858	set_vtx_resource(dev_priv, vb_addr);
859
860	/* draw */
861	draw_auto(dev_priv);
862
863	cp_set_surface_sync(dev_priv,
864			    R600_CB_ACTION_ENA | R600_CB0_DEST_BASE_ENA,
865			    dst_pitch * dy2, dst_gpu_addr);
866
867	dev_priv->blit_vb->used += 12 * 4;
868}