Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
  1/*
  2 * Copyright 2009 Advanced Micro Devices, Inc.
  3 * Copyright 2009 Red Hat Inc.
  4 *
  5 * Permission is hereby granted, free of charge, to any person obtaining a
  6 * copy of this software and associated documentation files (the "Software"),
  7 * to deal in the Software without restriction, including without limitation
  8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9 * and/or sell copies of the Software, and to permit persons to whom the
 10 * Software is furnished to do so, subject to the following conditions:
 11 *
 12 * The above copyright notice and this permission notice (including the next
 13 * paragraph) shall be included in all copies or substantial portions of the
 14 * Software.
 15 *
 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 19 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 22 * DEALINGS IN THE SOFTWARE.
 23 *
 24 */
 25
 26#include "drmP.h"
 27#include "drm.h"
 28#include "radeon_drm.h"
 29#include "radeon.h"
 30
 31#include "r600d.h"
 32#include "r600_blit_shaders.h"
 33
 34#define DI_PT_RECTLIST        0x11
 35#define DI_INDEX_SIZE_16_BIT  0x0
 36#define DI_SRC_SEL_AUTO_INDEX 0x2
 37
 38#define FMT_8                 0x1
 39#define FMT_5_6_5             0x8
 40#define FMT_8_8_8_8           0x1a
 41#define COLOR_8               0x1
 42#define COLOR_5_6_5           0x8
 43#define COLOR_8_8_8_8         0x1a
 44
 45/* emits 21 on rv770+, 23 on r600 */
 46static void
 47set_render_target(struct radeon_device *rdev, int format,
 48		  int w, int h, u64 gpu_addr)
 49{
 50	u32 cb_color_info;
 51	int pitch, slice;
 52
 53	h = ALIGN(h, 8);
 54	if (h < 8)
 55		h = 8;
 56
 57	cb_color_info = ((format << 2) | (1 << 27) | (1 << 8));
 58	pitch = (w / 8) - 1;
 59	slice = ((w * h) / 64) - 1;
 60
 61	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 62	radeon_ring_write(rdev, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 63	radeon_ring_write(rdev, gpu_addr >> 8);
 64
 65	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
 66		radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
 67		radeon_ring_write(rdev, 2 << 0);
 68	}
 69
 70	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 71	radeon_ring_write(rdev, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 72	radeon_ring_write(rdev, (pitch << 0) | (slice << 10));
 73
 74	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 75	radeon_ring_write(rdev, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 76	radeon_ring_write(rdev, 0);
 77
 78	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 79	radeon_ring_write(rdev, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 80	radeon_ring_write(rdev, cb_color_info);
 81
 82	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 83	radeon_ring_write(rdev, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 84	radeon_ring_write(rdev, 0);
 85
 86	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 87	radeon_ring_write(rdev, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 88	radeon_ring_write(rdev, 0);
 89
 90	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
 91	radeon_ring_write(rdev, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
 92	radeon_ring_write(rdev, 0);
 93}
 94
 95/* emits 5dw */
 96static void
 97cp_set_surface_sync(struct radeon_device *rdev,
 98		    u32 sync_type, u32 size,
 99		    u64 mc_addr)
100{
101	u32 cp_coher_size;
102
103	if (size == 0xffffffff)
104		cp_coher_size = 0xffffffff;
105	else
106		cp_coher_size = ((size + 255) >> 8);
107
108	radeon_ring_write(rdev, PACKET3(PACKET3_SURFACE_SYNC, 3));
109	radeon_ring_write(rdev, sync_type);
110	radeon_ring_write(rdev, cp_coher_size);
111	radeon_ring_write(rdev, mc_addr >> 8);
112	radeon_ring_write(rdev, 10); /* poll interval */
113}
114
115/* emits 21dw + 1 surface sync = 26dw */
116static void
117set_shaders(struct radeon_device *rdev)
118{
119	u64 gpu_addr;
120	u32 sq_pgm_resources;
121
122	/* setup shader regs */
123	sq_pgm_resources = (1 << 0);
124
125	/* VS */
126	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
127	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
128	radeon_ring_write(rdev, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
129	radeon_ring_write(rdev, gpu_addr >> 8);
130
131	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
132	radeon_ring_write(rdev, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
133	radeon_ring_write(rdev, sq_pgm_resources);
134
135	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
136	radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
137	radeon_ring_write(rdev, 0);
138
139	/* PS */
140	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
141	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
142	radeon_ring_write(rdev, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
143	radeon_ring_write(rdev, gpu_addr >> 8);
144
145	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
146	radeon_ring_write(rdev, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
147	radeon_ring_write(rdev, sq_pgm_resources | (1 << 28));
148
149	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
150	radeon_ring_write(rdev, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
151	radeon_ring_write(rdev, 2);
152
153	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
154	radeon_ring_write(rdev, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
155	radeon_ring_write(rdev, 0);
156
157	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
158	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
159}
160
161/* emits 9 + 1 sync (5) = 14*/
162static void
163set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
164{
165	u32 sq_vtx_constant_word2;
166
167	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
168#ifdef __BIG_ENDIAN
169	sq_vtx_constant_word2 |= (2 << 30);
170#endif
171
172	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
173	radeon_ring_write(rdev, 0x460);
174	radeon_ring_write(rdev, gpu_addr & 0xffffffff);
175	radeon_ring_write(rdev, 48 - 1);
176	radeon_ring_write(rdev, sq_vtx_constant_word2);
177	radeon_ring_write(rdev, 1 << 0);
178	radeon_ring_write(rdev, 0);
179	radeon_ring_write(rdev, 0);
180	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
181
182	if ((rdev->family == CHIP_RV610) ||
183	    (rdev->family == CHIP_RV620) ||
184	    (rdev->family == CHIP_RS780) ||
185	    (rdev->family == CHIP_RS880) ||
186	    (rdev->family == CHIP_RV710))
187		cp_set_surface_sync(rdev,
188				    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
189	else
190		cp_set_surface_sync(rdev,
191				    PACKET3_VC_ACTION_ENA, 48, gpu_addr);
192}
193
194/* emits 9 */
195static void
196set_tex_resource(struct radeon_device *rdev,
197		 int format, int w, int h, int pitch,
198		 u64 gpu_addr)
199{
200	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
201
202	if (h < 1)
203		h = 1;
204
205	sq_tex_resource_word0 = (1 << 0) | (1 << 3);
206	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
207				  ((w - 1) << 19));
208
209	sq_tex_resource_word1 = (format << 26);
210	sq_tex_resource_word1 |= ((h - 1) << 0);
211
212	sq_tex_resource_word4 = ((1 << 14) |
213				 (0 << 16) |
214				 (1 << 19) |
215				 (2 << 22) |
216				 (3 << 25));
217
218	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
219	radeon_ring_write(rdev, 0);
220	radeon_ring_write(rdev, sq_tex_resource_word0);
221	radeon_ring_write(rdev, sq_tex_resource_word1);
222	radeon_ring_write(rdev, gpu_addr >> 8);
223	radeon_ring_write(rdev, gpu_addr >> 8);
224	radeon_ring_write(rdev, sq_tex_resource_word4);
225	radeon_ring_write(rdev, 0);
226	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_TEXTURE << 30);
227}
228
229/* emits 12 */
230static void
231set_scissors(struct radeon_device *rdev, int x1, int y1,
232	     int x2, int y2)
233{
234	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
235	radeon_ring_write(rdev, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
236	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16));
237	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
238
239	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
240	radeon_ring_write(rdev, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
241	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
242	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
243
244	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
245	radeon_ring_write(rdev, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
246	radeon_ring_write(rdev, (x1 << 0) | (y1 << 16) | (1 << 31));
247	radeon_ring_write(rdev, (x2 << 0) | (y2 << 16));
248}
249
250/* emits 10 */
251static void
252draw_auto(struct radeon_device *rdev)
253{
254	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
255	radeon_ring_write(rdev, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
256	radeon_ring_write(rdev, DI_PT_RECTLIST);
257
258	radeon_ring_write(rdev, PACKET3(PACKET3_INDEX_TYPE, 0));
259	radeon_ring_write(rdev,
260#ifdef __BIG_ENDIAN
261			  (2 << 2) |
262#endif
263			  DI_INDEX_SIZE_16_BIT);
264
265	radeon_ring_write(rdev, PACKET3(PACKET3_NUM_INSTANCES, 0));
266	radeon_ring_write(rdev, 1);
267
268	radeon_ring_write(rdev, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
269	radeon_ring_write(rdev, 3);
270	radeon_ring_write(rdev, DI_SRC_SEL_AUTO_INDEX);
271
272}
273
274/* emits 14 */
275static void
276set_default_state(struct radeon_device *rdev)
277{
278	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
279	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
280	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
281	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
282	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
283	u64 gpu_addr;
284	int dwords;
285
286	switch (rdev->family) {
287	case CHIP_R600:
288		num_ps_gprs = 192;
289		num_vs_gprs = 56;
290		num_temp_gprs = 4;
291		num_gs_gprs = 0;
292		num_es_gprs = 0;
293		num_ps_threads = 136;
294		num_vs_threads = 48;
295		num_gs_threads = 4;
296		num_es_threads = 4;
297		num_ps_stack_entries = 128;
298		num_vs_stack_entries = 128;
299		num_gs_stack_entries = 0;
300		num_es_stack_entries = 0;
301		break;
302	case CHIP_RV630:
303	case CHIP_RV635:
304		num_ps_gprs = 84;
305		num_vs_gprs = 36;
306		num_temp_gprs = 4;
307		num_gs_gprs = 0;
308		num_es_gprs = 0;
309		num_ps_threads = 144;
310		num_vs_threads = 40;
311		num_gs_threads = 4;
312		num_es_threads = 4;
313		num_ps_stack_entries = 40;
314		num_vs_stack_entries = 40;
315		num_gs_stack_entries = 32;
316		num_es_stack_entries = 16;
317		break;
318	case CHIP_RV610:
319	case CHIP_RV620:
320	case CHIP_RS780:
321	case CHIP_RS880:
322	default:
323		num_ps_gprs = 84;
324		num_vs_gprs = 36;
325		num_temp_gprs = 4;
326		num_gs_gprs = 0;
327		num_es_gprs = 0;
328		num_ps_threads = 136;
329		num_vs_threads = 48;
330		num_gs_threads = 4;
331		num_es_threads = 4;
332		num_ps_stack_entries = 40;
333		num_vs_stack_entries = 40;
334		num_gs_stack_entries = 32;
335		num_es_stack_entries = 16;
336		break;
337	case CHIP_RV670:
338		num_ps_gprs = 144;
339		num_vs_gprs = 40;
340		num_temp_gprs = 4;
341		num_gs_gprs = 0;
342		num_es_gprs = 0;
343		num_ps_threads = 136;
344		num_vs_threads = 48;
345		num_gs_threads = 4;
346		num_es_threads = 4;
347		num_ps_stack_entries = 40;
348		num_vs_stack_entries = 40;
349		num_gs_stack_entries = 32;
350		num_es_stack_entries = 16;
351		break;
352	case CHIP_RV770:
353		num_ps_gprs = 192;
354		num_vs_gprs = 56;
355		num_temp_gprs = 4;
356		num_gs_gprs = 0;
357		num_es_gprs = 0;
358		num_ps_threads = 188;
359		num_vs_threads = 60;
360		num_gs_threads = 0;
361		num_es_threads = 0;
362		num_ps_stack_entries = 256;
363		num_vs_stack_entries = 256;
364		num_gs_stack_entries = 0;
365		num_es_stack_entries = 0;
366		break;
367	case CHIP_RV730:
368	case CHIP_RV740:
369		num_ps_gprs = 84;
370		num_vs_gprs = 36;
371		num_temp_gprs = 4;
372		num_gs_gprs = 0;
373		num_es_gprs = 0;
374		num_ps_threads = 188;
375		num_vs_threads = 60;
376		num_gs_threads = 0;
377		num_es_threads = 0;
378		num_ps_stack_entries = 128;
379		num_vs_stack_entries = 128;
380		num_gs_stack_entries = 0;
381		num_es_stack_entries = 0;
382		break;
383	case CHIP_RV710:
384		num_ps_gprs = 192;
385		num_vs_gprs = 56;
386		num_temp_gprs = 4;
387		num_gs_gprs = 0;
388		num_es_gprs = 0;
389		num_ps_threads = 144;
390		num_vs_threads = 48;
391		num_gs_threads = 0;
392		num_es_threads = 0;
393		num_ps_stack_entries = 128;
394		num_vs_stack_entries = 128;
395		num_gs_stack_entries = 0;
396		num_es_stack_entries = 0;
397		break;
398	}
399
400	if ((rdev->family == CHIP_RV610) ||
401	    (rdev->family == CHIP_RV620) ||
402	    (rdev->family == CHIP_RS780) ||
403	    (rdev->family == CHIP_RS880) ||
404	    (rdev->family == CHIP_RV710))
405		sq_config = 0;
406	else
407		sq_config = VC_ENABLE;
408
409	sq_config |= (DX9_CONSTS |
410		      ALU_INST_PREFER_VECTOR |
411		      PS_PRIO(0) |
412		      VS_PRIO(1) |
413		      GS_PRIO(2) |
414		      ES_PRIO(3));
415
416	sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
417				  NUM_VS_GPRS(num_vs_gprs) |
418				  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
419	sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
420				  NUM_ES_GPRS(num_es_gprs));
421	sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
422				   NUM_VS_THREADS(num_vs_threads) |
423				   NUM_GS_THREADS(num_gs_threads) |
424				   NUM_ES_THREADS(num_es_threads));
425	sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
426				    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
427	sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
428				    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
429
430	/* emit an IB pointing at default state */
431	dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
432	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
433	radeon_ring_write(rdev, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
434	radeon_ring_write(rdev,
435#ifdef __BIG_ENDIAN
436			  (2 << 0) |
437#endif
438			  (gpu_addr & 0xFFFFFFFC));
439	radeon_ring_write(rdev, upper_32_bits(gpu_addr) & 0xFF);
440	radeon_ring_write(rdev, dwords);
441
442	/* SQ config */
443	radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 6));
444	radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
445	radeon_ring_write(rdev, sq_config);
446	radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
447	radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
448	radeon_ring_write(rdev, sq_thread_resource_mgmt);
449	radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
450	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
451}
452
453static inline uint32_t i2f(uint32_t input)
454{
455	u32 result, i, exponent, fraction;
456
457	if ((input & 0x3fff) == 0)
458		result = 0; /* 0 is a special case */
459	else {
460		exponent = 140; /* exponent biased by 127; */
461		fraction = (input & 0x3fff) << 10; /* cheat and only
462						      handle numbers below 2^^15 */
463		for (i = 0; i < 14; i++) {
464			if (fraction & 0x800000)
465				break;
466			else {
467				fraction = fraction << 1; /* keep
468							     shifting left until top bit = 1 */
469				exponent = exponent - 1;
470			}
471		}
472		result = exponent << 23 | (fraction & 0x7fffff); /* mask
473								    off top bit; assumed 1 */
474	}
475	return result;
476}
477
478int r600_blit_init(struct radeon_device *rdev)
479{
480	u32 obj_size;
481	int i, r, dwords;
482	void *ptr;
483	u32 packet2s[16];
484	int num_packet2s = 0;
485
486	/* pin copy shader into vram if already initialized */
487	if (rdev->r600_blit.shader_obj)
488		goto done;
489
490	mutex_init(&rdev->r600_blit.mutex);
491	rdev->r600_blit.state_offset = 0;
492
493	if (rdev->family >= CHIP_RV770)
494		rdev->r600_blit.state_len = r7xx_default_size;
495	else
496		rdev->r600_blit.state_len = r6xx_default_size;
497
498	dwords = rdev->r600_blit.state_len;
499	while (dwords & 0xf) {
500		packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
501		dwords++;
502	}
503
504	obj_size = dwords * 4;
505	obj_size = ALIGN(obj_size, 256);
506
507	rdev->r600_blit.vs_offset = obj_size;
508	obj_size += r6xx_vs_size * 4;
509	obj_size = ALIGN(obj_size, 256);
510
511	rdev->r600_blit.ps_offset = obj_size;
512	obj_size += r6xx_ps_size * 4;
513	obj_size = ALIGN(obj_size, 256);
514
515	r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
516				&rdev->r600_blit.shader_obj);
517	if (r) {
518		DRM_ERROR("r600 failed to allocate shader\n");
519		return r;
520	}
521
522	DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n",
523		  obj_size,
524		  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
525
526	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
527	if (unlikely(r != 0))
528		return r;
529	r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
530	if (r) {
531		DRM_ERROR("failed to map blit object %d\n", r);
532		return r;
533	}
534	if (rdev->family >= CHIP_RV770)
535		memcpy_toio(ptr + rdev->r600_blit.state_offset,
536			    r7xx_default_state, rdev->r600_blit.state_len * 4);
537	else
538		memcpy_toio(ptr + rdev->r600_blit.state_offset,
539			    r6xx_default_state, rdev->r600_blit.state_len * 4);
540	if (num_packet2s)
541		memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
542			    packet2s, num_packet2s * 4);
543	for (i = 0; i < r6xx_vs_size; i++)
544		*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(r6xx_vs[i]);
545	for (i = 0; i < r6xx_ps_size; i++)
546		*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(r6xx_ps[i]);
547	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
548	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
549
550done:
551	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
552	if (unlikely(r != 0))
553		return r;
554	r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
555			  &rdev->r600_blit.shader_gpu_addr);
556	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
557	if (r) {
558		dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
559		return r;
560	}
561	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
562	return 0;
563}
564
565void r600_blit_fini(struct radeon_device *rdev)
566{
567	int r;
568
569	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
570	if (rdev->r600_blit.shader_obj == NULL)
571		return;
572	/* If we can't reserve the bo, unref should be enough to destroy
573	 * it when it becomes idle.
574	 */
575	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
576	if (!r) {
577		radeon_bo_unpin(rdev->r600_blit.shader_obj);
578		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
579	}
580	radeon_bo_unref(&rdev->r600_blit.shader_obj);
581}
582
583static int r600_vb_ib_get(struct radeon_device *rdev)
584{
585	int r;
586	r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
587	if (r) {
588		DRM_ERROR("failed to get IB for vertex buffer\n");
589		return r;
590	}
591
592	rdev->r600_blit.vb_total = 64*1024;
593	rdev->r600_blit.vb_used = 0;
594	return 0;
595}
596
597static void r600_vb_ib_put(struct radeon_device *rdev)
598{
599	radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
600	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
601}
602
603int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
604{
605	int r;
606	int ring_size, line_size;
607	int max_size;
608	/* loops of emits 64 + fence emit possible */
609	int dwords_per_loop = 76, num_loops;
610
611	r = r600_vb_ib_get(rdev);
612	if (r)
613		return r;
614
615	/* set_render_target emits 2 extra dwords on rv6xx */
616	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
617		dwords_per_loop += 2;
618
619	/* 8 bpp vs 32 bpp for xfer unit */
620	if (size_bytes & 3)
621		line_size = 8192;
622	else
623		line_size = 8192*4;
624
625	max_size = 8192 * line_size;
626
627	/* major loops cover the max size transfer */
628	num_loops = ((size_bytes + max_size) / max_size);
629	/* minor loops cover the extra non aligned bits */
630	num_loops += ((size_bytes % line_size) ? 1 : 0);
631	/* calculate number of loops correctly */
632	ring_size = num_loops * dwords_per_loop;
633	/* set default  + shaders */
634	ring_size += 40; /* shaders + def state */
635	ring_size += 10; /* fence emit for VB IB */
636	ring_size += 5; /* done copy */
637	ring_size += 10; /* fence emit for done copy */
638	r = radeon_ring_lock(rdev, ring_size);
639	if (r)
640		return r;
641
642	set_default_state(rdev); /* 14 */
643	set_shaders(rdev); /* 26 */
644	return 0;
645}
646
647void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
648{
649	int r;
650
651	if (rdev->r600_blit.vb_ib)
652		r600_vb_ib_put(rdev);
653
654	if (fence)
655		r = radeon_fence_emit(rdev, fence);
656
657	radeon_ring_unlock_commit(rdev);
658}
659
660void r600_kms_blit_copy(struct radeon_device *rdev,
661			u64 src_gpu_addr, u64 dst_gpu_addr,
662			int size_bytes)
663{
664	int max_bytes;
665	u64 vb_gpu_addr;
666	u32 *vb;
667
668	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
669		  size_bytes, rdev->r600_blit.vb_used);
670	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
671	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
672		max_bytes = 8192;
673
674		while (size_bytes) {
675			int cur_size = size_bytes;
676			int src_x = src_gpu_addr & 255;
677			int dst_x = dst_gpu_addr & 255;
678			int h = 1;
679			src_gpu_addr = src_gpu_addr & ~255ULL;
680			dst_gpu_addr = dst_gpu_addr & ~255ULL;
681
682			if (!src_x && !dst_x) {
683				h = (cur_size / max_bytes);
684				if (h > 8192)
685					h = 8192;
686				if (h == 0)
687					h = 1;
688				else
689					cur_size = max_bytes;
690			} else {
691				if (cur_size > max_bytes)
692					cur_size = max_bytes;
693				if (cur_size > (max_bytes - dst_x))
694					cur_size = (max_bytes - dst_x);
695				if (cur_size > (max_bytes - src_x))
696					cur_size = (max_bytes - src_x);
697			}
698
699			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
700				WARN_ON(1);
701			}
702
703			vb[0] = i2f(dst_x);
704			vb[1] = 0;
705			vb[2] = i2f(src_x);
706			vb[3] = 0;
707
708			vb[4] = i2f(dst_x);
709			vb[5] = i2f(h);
710			vb[6] = i2f(src_x);
711			vb[7] = i2f(h);
712
713			vb[8] = i2f(dst_x + cur_size);
714			vb[9] = i2f(h);
715			vb[10] = i2f(src_x + cur_size);
716			vb[11] = i2f(h);
717
718			/* src 9 */
719			set_tex_resource(rdev, FMT_8,
720					 src_x + cur_size, h, src_x + cur_size,
721					 src_gpu_addr);
722
723			/* 5 */
724			cp_set_surface_sync(rdev,
725					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
726
727			/* dst 23 */
728			set_render_target(rdev, COLOR_8,
729					  dst_x + cur_size, h,
730					  dst_gpu_addr);
731
732			/* scissors 12 */
733			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
734
735			/* 14 */
736			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
737			set_vtx_resource(rdev, vb_gpu_addr);
738
739			/* draw 10 */
740			draw_auto(rdev);
741
742			/* 5 */
743			cp_set_surface_sync(rdev,
744					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
745					    cur_size * h, dst_gpu_addr);
746
747			vb += 12;
748			rdev->r600_blit.vb_used += 12 * 4;
749
750			src_gpu_addr += cur_size * h;
751			dst_gpu_addr += cur_size * h;
752			size_bytes -= cur_size * h;
753		}
754	} else {
755		max_bytes = 8192 * 4;
756
757		while (size_bytes) {
758			int cur_size = size_bytes;
759			int src_x = (src_gpu_addr & 255);
760			int dst_x = (dst_gpu_addr & 255);
761			int h = 1;
762			src_gpu_addr = src_gpu_addr & ~255ULL;
763			dst_gpu_addr = dst_gpu_addr & ~255ULL;
764
765			if (!src_x && !dst_x) {
766				h = (cur_size / max_bytes);
767				if (h > 8192)
768					h = 8192;
769				if (h == 0)
770					h = 1;
771				else
772					cur_size = max_bytes;
773			} else {
774				if (cur_size > max_bytes)
775					cur_size = max_bytes;
776				if (cur_size > (max_bytes - dst_x))
777					cur_size = (max_bytes - dst_x);
778				if (cur_size > (max_bytes - src_x))
779					cur_size = (max_bytes - src_x);
780			}
781
782			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
783				WARN_ON(1);
784			}
785
786			vb[0] = i2f(dst_x / 4);
787			vb[1] = 0;
788			vb[2] = i2f(src_x / 4);
789			vb[3] = 0;
790
791			vb[4] = i2f(dst_x / 4);
792			vb[5] = i2f(h);
793			vb[6] = i2f(src_x / 4);
794			vb[7] = i2f(h);
795
796			vb[8] = i2f((dst_x + cur_size) / 4);
797			vb[9] = i2f(h);
798			vb[10] = i2f((src_x + cur_size) / 4);
799			vb[11] = i2f(h);
800
801			/* src 9 */
802			set_tex_resource(rdev, FMT_8_8_8_8,
803					 (src_x + cur_size) / 4,
804					 h, (src_x + cur_size) / 4,
805					 src_gpu_addr);
806			/* 5 */
807			cp_set_surface_sync(rdev,
808					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
809
810			/* dst 23 */
811			set_render_target(rdev, COLOR_8_8_8_8,
812					  (dst_x + cur_size) / 4, h,
813					  dst_gpu_addr);
814
815			/* scissors 12  */
816			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
817
818			/* Vertex buffer setup 14 */
819			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
820			set_vtx_resource(rdev, vb_gpu_addr);
821
822			/* draw 10 */
823			draw_auto(rdev);
824
825			/* 5 */
826			cp_set_surface_sync(rdev,
827					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
828					    cur_size * h, dst_gpu_addr);
829
830			/* 78 ring dwords per loop */
831			vb += 12;
832			rdev->r600_blit.vb_used += 12 * 4;
833
834			src_gpu_addr += cur_size * h;
835			dst_gpu_addr += cur_size * h;
836			size_bytes -= cur_size * h;
837		}
838	}
839}
840