Loading...
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24
25#include <linux/firmware.h>
26#include <linux/module.h>
27#include <linux/pci.h>
28#include <linux/slab.h>
29
30#include <drm/drm_vblank.h>
31
32#include "atom.h"
33#include "evergreen.h"
34#include "cik_blit_shaders.h"
35#include "cik.h"
36#include "cikd.h"
37#include "clearstate_ci.h"
38#include "r600.h"
39#include "radeon.h"
40#include "radeon_asic.h"
41#include "radeon_audio.h"
42#include "radeon_ucode.h"
43#include "si.h"
44#include "vce.h"
45
46#define SH_MEM_CONFIG_GFX_DEFAULT \
47 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
48
49MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
50MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
51MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
53MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
54MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
55MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
58
59MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
60MODULE_FIRMWARE("radeon/bonaire_me.bin");
61MODULE_FIRMWARE("radeon/bonaire_ce.bin");
62MODULE_FIRMWARE("radeon/bonaire_mec.bin");
63MODULE_FIRMWARE("radeon/bonaire_mc.bin");
64MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
65MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
66MODULE_FIRMWARE("radeon/bonaire_smc.bin");
67MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
68
69MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
70MODULE_FIRMWARE("radeon/HAWAII_me.bin");
71MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
72MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
73MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
74MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
75MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
76MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
77MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
78
79MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
80MODULE_FIRMWARE("radeon/hawaii_me.bin");
81MODULE_FIRMWARE("radeon/hawaii_ce.bin");
82MODULE_FIRMWARE("radeon/hawaii_mec.bin");
83MODULE_FIRMWARE("radeon/hawaii_mc.bin");
84MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
85MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
86MODULE_FIRMWARE("radeon/hawaii_smc.bin");
87MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
88
89MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
90MODULE_FIRMWARE("radeon/KAVERI_me.bin");
91MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
92MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
93MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
94MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
95
96MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
97MODULE_FIRMWARE("radeon/kaveri_me.bin");
98MODULE_FIRMWARE("radeon/kaveri_ce.bin");
99MODULE_FIRMWARE("radeon/kaveri_mec.bin");
100MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
101MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
102MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
103
104MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
105MODULE_FIRMWARE("radeon/KABINI_me.bin");
106MODULE_FIRMWARE("radeon/KABINI_ce.bin");
107MODULE_FIRMWARE("radeon/KABINI_mec.bin");
108MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
109MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
110
111MODULE_FIRMWARE("radeon/kabini_pfp.bin");
112MODULE_FIRMWARE("radeon/kabini_me.bin");
113MODULE_FIRMWARE("radeon/kabini_ce.bin");
114MODULE_FIRMWARE("radeon/kabini_mec.bin");
115MODULE_FIRMWARE("radeon/kabini_rlc.bin");
116MODULE_FIRMWARE("radeon/kabini_sdma.bin");
117
118MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
119MODULE_FIRMWARE("radeon/MULLINS_me.bin");
120MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
121MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
122MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
123MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
124
125MODULE_FIRMWARE("radeon/mullins_pfp.bin");
126MODULE_FIRMWARE("radeon/mullins_me.bin");
127MODULE_FIRMWARE("radeon/mullins_ce.bin");
128MODULE_FIRMWARE("radeon/mullins_mec.bin");
129MODULE_FIRMWARE("radeon/mullins_rlc.bin");
130MODULE_FIRMWARE("radeon/mullins_sdma.bin");
131
132static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
133static void cik_rlc_stop(struct radeon_device *rdev);
134static void cik_pcie_gen3_enable(struct radeon_device *rdev);
135static void cik_program_aspm(struct radeon_device *rdev);
136static void cik_init_pg(struct radeon_device *rdev);
137static void cik_init_cg(struct radeon_device *rdev);
138static void cik_fini_pg(struct radeon_device *rdev);
139static void cik_fini_cg(struct radeon_device *rdev);
140static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
141 bool enable);
142
143/**
144 * cik_get_allowed_info_register - fetch the register for the info ioctl
145 *
146 * @rdev: radeon_device pointer
147 * @reg: register offset in bytes
148 * @val: register value
149 *
150 * Returns 0 for success or -EINVAL for an invalid register
151 *
152 */
153int cik_get_allowed_info_register(struct radeon_device *rdev,
154 u32 reg, u32 *val)
155{
156 switch (reg) {
157 case GRBM_STATUS:
158 case GRBM_STATUS2:
159 case GRBM_STATUS_SE0:
160 case GRBM_STATUS_SE1:
161 case GRBM_STATUS_SE2:
162 case GRBM_STATUS_SE3:
163 case SRBM_STATUS:
164 case SRBM_STATUS2:
165 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
166 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
167 case UVD_STATUS:
168 /* TODO VCE */
169 *val = RREG32(reg);
170 return 0;
171 default:
172 return -EINVAL;
173 }
174}
175
176/*
177 * Indirect registers accessor
178 */
179u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
180{
181 unsigned long flags;
182 u32 r;
183
184 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
185 WREG32(CIK_DIDT_IND_INDEX, (reg));
186 r = RREG32(CIK_DIDT_IND_DATA);
187 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
188 return r;
189}
190
191void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
192{
193 unsigned long flags;
194
195 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
196 WREG32(CIK_DIDT_IND_INDEX, (reg));
197 WREG32(CIK_DIDT_IND_DATA, (v));
198 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
199}
200
201/* get temperature in millidegrees */
202int ci_get_temp(struct radeon_device *rdev)
203{
204 u32 temp;
205 int actual_temp = 0;
206
207 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
208 CTF_TEMP_SHIFT;
209
210 if (temp & 0x200)
211 actual_temp = 255;
212 else
213 actual_temp = temp & 0x1ff;
214
215 return actual_temp * 1000;
216}
217
218/* get temperature in millidegrees */
219int kv_get_temp(struct radeon_device *rdev)
220{
221 u32 temp;
222 int actual_temp = 0;
223
224 temp = RREG32_SMC(0xC0300E0C);
225
226 if (temp)
227 actual_temp = (temp / 8) - 49;
228 else
229 actual_temp = 0;
230
231 return actual_temp * 1000;
232}
233
234/*
235 * Indirect registers accessor
236 */
237u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
238{
239 unsigned long flags;
240 u32 r;
241
242 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
243 WREG32(PCIE_INDEX, reg);
244 (void)RREG32(PCIE_INDEX);
245 r = RREG32(PCIE_DATA);
246 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
247 return r;
248}
249
250void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
251{
252 unsigned long flags;
253
254 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
255 WREG32(PCIE_INDEX, reg);
256 (void)RREG32(PCIE_INDEX);
257 WREG32(PCIE_DATA, v);
258 (void)RREG32(PCIE_DATA);
259 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
260}
261
262static const u32 spectre_rlc_save_restore_register_list[] =
263{
264 (0x0e00 << 16) | (0xc12c >> 2),
265 0x00000000,
266 (0x0e00 << 16) | (0xc140 >> 2),
267 0x00000000,
268 (0x0e00 << 16) | (0xc150 >> 2),
269 0x00000000,
270 (0x0e00 << 16) | (0xc15c >> 2),
271 0x00000000,
272 (0x0e00 << 16) | (0xc168 >> 2),
273 0x00000000,
274 (0x0e00 << 16) | (0xc170 >> 2),
275 0x00000000,
276 (0x0e00 << 16) | (0xc178 >> 2),
277 0x00000000,
278 (0x0e00 << 16) | (0xc204 >> 2),
279 0x00000000,
280 (0x0e00 << 16) | (0xc2b4 >> 2),
281 0x00000000,
282 (0x0e00 << 16) | (0xc2b8 >> 2),
283 0x00000000,
284 (0x0e00 << 16) | (0xc2bc >> 2),
285 0x00000000,
286 (0x0e00 << 16) | (0xc2c0 >> 2),
287 0x00000000,
288 (0x0e00 << 16) | (0x8228 >> 2),
289 0x00000000,
290 (0x0e00 << 16) | (0x829c >> 2),
291 0x00000000,
292 (0x0e00 << 16) | (0x869c >> 2),
293 0x00000000,
294 (0x0600 << 16) | (0x98f4 >> 2),
295 0x00000000,
296 (0x0e00 << 16) | (0x98f8 >> 2),
297 0x00000000,
298 (0x0e00 << 16) | (0x9900 >> 2),
299 0x00000000,
300 (0x0e00 << 16) | (0xc260 >> 2),
301 0x00000000,
302 (0x0e00 << 16) | (0x90e8 >> 2),
303 0x00000000,
304 (0x0e00 << 16) | (0x3c000 >> 2),
305 0x00000000,
306 (0x0e00 << 16) | (0x3c00c >> 2),
307 0x00000000,
308 (0x0e00 << 16) | (0x8c1c >> 2),
309 0x00000000,
310 (0x0e00 << 16) | (0x9700 >> 2),
311 0x00000000,
312 (0x0e00 << 16) | (0xcd20 >> 2),
313 0x00000000,
314 (0x4e00 << 16) | (0xcd20 >> 2),
315 0x00000000,
316 (0x5e00 << 16) | (0xcd20 >> 2),
317 0x00000000,
318 (0x6e00 << 16) | (0xcd20 >> 2),
319 0x00000000,
320 (0x7e00 << 16) | (0xcd20 >> 2),
321 0x00000000,
322 (0x8e00 << 16) | (0xcd20 >> 2),
323 0x00000000,
324 (0x9e00 << 16) | (0xcd20 >> 2),
325 0x00000000,
326 (0xae00 << 16) | (0xcd20 >> 2),
327 0x00000000,
328 (0xbe00 << 16) | (0xcd20 >> 2),
329 0x00000000,
330 (0x0e00 << 16) | (0x89bc >> 2),
331 0x00000000,
332 (0x0e00 << 16) | (0x8900 >> 2),
333 0x00000000,
334 0x3,
335 (0x0e00 << 16) | (0xc130 >> 2),
336 0x00000000,
337 (0x0e00 << 16) | (0xc134 >> 2),
338 0x00000000,
339 (0x0e00 << 16) | (0xc1fc >> 2),
340 0x00000000,
341 (0x0e00 << 16) | (0xc208 >> 2),
342 0x00000000,
343 (0x0e00 << 16) | (0xc264 >> 2),
344 0x00000000,
345 (0x0e00 << 16) | (0xc268 >> 2),
346 0x00000000,
347 (0x0e00 << 16) | (0xc26c >> 2),
348 0x00000000,
349 (0x0e00 << 16) | (0xc270 >> 2),
350 0x00000000,
351 (0x0e00 << 16) | (0xc274 >> 2),
352 0x00000000,
353 (0x0e00 << 16) | (0xc278 >> 2),
354 0x00000000,
355 (0x0e00 << 16) | (0xc27c >> 2),
356 0x00000000,
357 (0x0e00 << 16) | (0xc280 >> 2),
358 0x00000000,
359 (0x0e00 << 16) | (0xc284 >> 2),
360 0x00000000,
361 (0x0e00 << 16) | (0xc288 >> 2),
362 0x00000000,
363 (0x0e00 << 16) | (0xc28c >> 2),
364 0x00000000,
365 (0x0e00 << 16) | (0xc290 >> 2),
366 0x00000000,
367 (0x0e00 << 16) | (0xc294 >> 2),
368 0x00000000,
369 (0x0e00 << 16) | (0xc298 >> 2),
370 0x00000000,
371 (0x0e00 << 16) | (0xc29c >> 2),
372 0x00000000,
373 (0x0e00 << 16) | (0xc2a0 >> 2),
374 0x00000000,
375 (0x0e00 << 16) | (0xc2a4 >> 2),
376 0x00000000,
377 (0x0e00 << 16) | (0xc2a8 >> 2),
378 0x00000000,
379 (0x0e00 << 16) | (0xc2ac >> 2),
380 0x00000000,
381 (0x0e00 << 16) | (0xc2b0 >> 2),
382 0x00000000,
383 (0x0e00 << 16) | (0x301d0 >> 2),
384 0x00000000,
385 (0x0e00 << 16) | (0x30238 >> 2),
386 0x00000000,
387 (0x0e00 << 16) | (0x30250 >> 2),
388 0x00000000,
389 (0x0e00 << 16) | (0x30254 >> 2),
390 0x00000000,
391 (0x0e00 << 16) | (0x30258 >> 2),
392 0x00000000,
393 (0x0e00 << 16) | (0x3025c >> 2),
394 0x00000000,
395 (0x4e00 << 16) | (0xc900 >> 2),
396 0x00000000,
397 (0x5e00 << 16) | (0xc900 >> 2),
398 0x00000000,
399 (0x6e00 << 16) | (0xc900 >> 2),
400 0x00000000,
401 (0x7e00 << 16) | (0xc900 >> 2),
402 0x00000000,
403 (0x8e00 << 16) | (0xc900 >> 2),
404 0x00000000,
405 (0x9e00 << 16) | (0xc900 >> 2),
406 0x00000000,
407 (0xae00 << 16) | (0xc900 >> 2),
408 0x00000000,
409 (0xbe00 << 16) | (0xc900 >> 2),
410 0x00000000,
411 (0x4e00 << 16) | (0xc904 >> 2),
412 0x00000000,
413 (0x5e00 << 16) | (0xc904 >> 2),
414 0x00000000,
415 (0x6e00 << 16) | (0xc904 >> 2),
416 0x00000000,
417 (0x7e00 << 16) | (0xc904 >> 2),
418 0x00000000,
419 (0x8e00 << 16) | (0xc904 >> 2),
420 0x00000000,
421 (0x9e00 << 16) | (0xc904 >> 2),
422 0x00000000,
423 (0xae00 << 16) | (0xc904 >> 2),
424 0x00000000,
425 (0xbe00 << 16) | (0xc904 >> 2),
426 0x00000000,
427 (0x4e00 << 16) | (0xc908 >> 2),
428 0x00000000,
429 (0x5e00 << 16) | (0xc908 >> 2),
430 0x00000000,
431 (0x6e00 << 16) | (0xc908 >> 2),
432 0x00000000,
433 (0x7e00 << 16) | (0xc908 >> 2),
434 0x00000000,
435 (0x8e00 << 16) | (0xc908 >> 2),
436 0x00000000,
437 (0x9e00 << 16) | (0xc908 >> 2),
438 0x00000000,
439 (0xae00 << 16) | (0xc908 >> 2),
440 0x00000000,
441 (0xbe00 << 16) | (0xc908 >> 2),
442 0x00000000,
443 (0x4e00 << 16) | (0xc90c >> 2),
444 0x00000000,
445 (0x5e00 << 16) | (0xc90c >> 2),
446 0x00000000,
447 (0x6e00 << 16) | (0xc90c >> 2),
448 0x00000000,
449 (0x7e00 << 16) | (0xc90c >> 2),
450 0x00000000,
451 (0x8e00 << 16) | (0xc90c >> 2),
452 0x00000000,
453 (0x9e00 << 16) | (0xc90c >> 2),
454 0x00000000,
455 (0xae00 << 16) | (0xc90c >> 2),
456 0x00000000,
457 (0xbe00 << 16) | (0xc90c >> 2),
458 0x00000000,
459 (0x4e00 << 16) | (0xc910 >> 2),
460 0x00000000,
461 (0x5e00 << 16) | (0xc910 >> 2),
462 0x00000000,
463 (0x6e00 << 16) | (0xc910 >> 2),
464 0x00000000,
465 (0x7e00 << 16) | (0xc910 >> 2),
466 0x00000000,
467 (0x8e00 << 16) | (0xc910 >> 2),
468 0x00000000,
469 (0x9e00 << 16) | (0xc910 >> 2),
470 0x00000000,
471 (0xae00 << 16) | (0xc910 >> 2),
472 0x00000000,
473 (0xbe00 << 16) | (0xc910 >> 2),
474 0x00000000,
475 (0x0e00 << 16) | (0xc99c >> 2),
476 0x00000000,
477 (0x0e00 << 16) | (0x9834 >> 2),
478 0x00000000,
479 (0x0000 << 16) | (0x30f00 >> 2),
480 0x00000000,
481 (0x0001 << 16) | (0x30f00 >> 2),
482 0x00000000,
483 (0x0000 << 16) | (0x30f04 >> 2),
484 0x00000000,
485 (0x0001 << 16) | (0x30f04 >> 2),
486 0x00000000,
487 (0x0000 << 16) | (0x30f08 >> 2),
488 0x00000000,
489 (0x0001 << 16) | (0x30f08 >> 2),
490 0x00000000,
491 (0x0000 << 16) | (0x30f0c >> 2),
492 0x00000000,
493 (0x0001 << 16) | (0x30f0c >> 2),
494 0x00000000,
495 (0x0600 << 16) | (0x9b7c >> 2),
496 0x00000000,
497 (0x0e00 << 16) | (0x8a14 >> 2),
498 0x00000000,
499 (0x0e00 << 16) | (0x8a18 >> 2),
500 0x00000000,
501 (0x0600 << 16) | (0x30a00 >> 2),
502 0x00000000,
503 (0x0e00 << 16) | (0x8bf0 >> 2),
504 0x00000000,
505 (0x0e00 << 16) | (0x8bcc >> 2),
506 0x00000000,
507 (0x0e00 << 16) | (0x8b24 >> 2),
508 0x00000000,
509 (0x0e00 << 16) | (0x30a04 >> 2),
510 0x00000000,
511 (0x0600 << 16) | (0x30a10 >> 2),
512 0x00000000,
513 (0x0600 << 16) | (0x30a14 >> 2),
514 0x00000000,
515 (0x0600 << 16) | (0x30a18 >> 2),
516 0x00000000,
517 (0x0600 << 16) | (0x30a2c >> 2),
518 0x00000000,
519 (0x0e00 << 16) | (0xc700 >> 2),
520 0x00000000,
521 (0x0e00 << 16) | (0xc704 >> 2),
522 0x00000000,
523 (0x0e00 << 16) | (0xc708 >> 2),
524 0x00000000,
525 (0x0e00 << 16) | (0xc768 >> 2),
526 0x00000000,
527 (0x0400 << 16) | (0xc770 >> 2),
528 0x00000000,
529 (0x0400 << 16) | (0xc774 >> 2),
530 0x00000000,
531 (0x0400 << 16) | (0xc778 >> 2),
532 0x00000000,
533 (0x0400 << 16) | (0xc77c >> 2),
534 0x00000000,
535 (0x0400 << 16) | (0xc780 >> 2),
536 0x00000000,
537 (0x0400 << 16) | (0xc784 >> 2),
538 0x00000000,
539 (0x0400 << 16) | (0xc788 >> 2),
540 0x00000000,
541 (0x0400 << 16) | (0xc78c >> 2),
542 0x00000000,
543 (0x0400 << 16) | (0xc798 >> 2),
544 0x00000000,
545 (0x0400 << 16) | (0xc79c >> 2),
546 0x00000000,
547 (0x0400 << 16) | (0xc7a0 >> 2),
548 0x00000000,
549 (0x0400 << 16) | (0xc7a4 >> 2),
550 0x00000000,
551 (0x0400 << 16) | (0xc7a8 >> 2),
552 0x00000000,
553 (0x0400 << 16) | (0xc7ac >> 2),
554 0x00000000,
555 (0x0400 << 16) | (0xc7b0 >> 2),
556 0x00000000,
557 (0x0400 << 16) | (0xc7b4 >> 2),
558 0x00000000,
559 (0x0e00 << 16) | (0x9100 >> 2),
560 0x00000000,
561 (0x0e00 << 16) | (0x3c010 >> 2),
562 0x00000000,
563 (0x0e00 << 16) | (0x92a8 >> 2),
564 0x00000000,
565 (0x0e00 << 16) | (0x92ac >> 2),
566 0x00000000,
567 (0x0e00 << 16) | (0x92b4 >> 2),
568 0x00000000,
569 (0x0e00 << 16) | (0x92b8 >> 2),
570 0x00000000,
571 (0x0e00 << 16) | (0x92bc >> 2),
572 0x00000000,
573 (0x0e00 << 16) | (0x92c0 >> 2),
574 0x00000000,
575 (0x0e00 << 16) | (0x92c4 >> 2),
576 0x00000000,
577 (0x0e00 << 16) | (0x92c8 >> 2),
578 0x00000000,
579 (0x0e00 << 16) | (0x92cc >> 2),
580 0x00000000,
581 (0x0e00 << 16) | (0x92d0 >> 2),
582 0x00000000,
583 (0x0e00 << 16) | (0x8c00 >> 2),
584 0x00000000,
585 (0x0e00 << 16) | (0x8c04 >> 2),
586 0x00000000,
587 (0x0e00 << 16) | (0x8c20 >> 2),
588 0x00000000,
589 (0x0e00 << 16) | (0x8c38 >> 2),
590 0x00000000,
591 (0x0e00 << 16) | (0x8c3c >> 2),
592 0x00000000,
593 (0x0e00 << 16) | (0xae00 >> 2),
594 0x00000000,
595 (0x0e00 << 16) | (0x9604 >> 2),
596 0x00000000,
597 (0x0e00 << 16) | (0xac08 >> 2),
598 0x00000000,
599 (0x0e00 << 16) | (0xac0c >> 2),
600 0x00000000,
601 (0x0e00 << 16) | (0xac10 >> 2),
602 0x00000000,
603 (0x0e00 << 16) | (0xac14 >> 2),
604 0x00000000,
605 (0x0e00 << 16) | (0xac58 >> 2),
606 0x00000000,
607 (0x0e00 << 16) | (0xac68 >> 2),
608 0x00000000,
609 (0x0e00 << 16) | (0xac6c >> 2),
610 0x00000000,
611 (0x0e00 << 16) | (0xac70 >> 2),
612 0x00000000,
613 (0x0e00 << 16) | (0xac74 >> 2),
614 0x00000000,
615 (0x0e00 << 16) | (0xac78 >> 2),
616 0x00000000,
617 (0x0e00 << 16) | (0xac7c >> 2),
618 0x00000000,
619 (0x0e00 << 16) | (0xac80 >> 2),
620 0x00000000,
621 (0x0e00 << 16) | (0xac84 >> 2),
622 0x00000000,
623 (0x0e00 << 16) | (0xac88 >> 2),
624 0x00000000,
625 (0x0e00 << 16) | (0xac8c >> 2),
626 0x00000000,
627 (0x0e00 << 16) | (0x970c >> 2),
628 0x00000000,
629 (0x0e00 << 16) | (0x9714 >> 2),
630 0x00000000,
631 (0x0e00 << 16) | (0x9718 >> 2),
632 0x00000000,
633 (0x0e00 << 16) | (0x971c >> 2),
634 0x00000000,
635 (0x0e00 << 16) | (0x31068 >> 2),
636 0x00000000,
637 (0x4e00 << 16) | (0x31068 >> 2),
638 0x00000000,
639 (0x5e00 << 16) | (0x31068 >> 2),
640 0x00000000,
641 (0x6e00 << 16) | (0x31068 >> 2),
642 0x00000000,
643 (0x7e00 << 16) | (0x31068 >> 2),
644 0x00000000,
645 (0x8e00 << 16) | (0x31068 >> 2),
646 0x00000000,
647 (0x9e00 << 16) | (0x31068 >> 2),
648 0x00000000,
649 (0xae00 << 16) | (0x31068 >> 2),
650 0x00000000,
651 (0xbe00 << 16) | (0x31068 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xcd10 >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0xcd14 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0x88b0 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0x88b4 >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0x88b8 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0x88bc >> 2),
664 0x00000000,
665 (0x0400 << 16) | (0x89c0 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0x88c4 >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0x88c8 >> 2),
670 0x00000000,
671 (0x0e00 << 16) | (0x88d0 >> 2),
672 0x00000000,
673 (0x0e00 << 16) | (0x88d4 >> 2),
674 0x00000000,
675 (0x0e00 << 16) | (0x88d8 >> 2),
676 0x00000000,
677 (0x0e00 << 16) | (0x8980 >> 2),
678 0x00000000,
679 (0x0e00 << 16) | (0x30938 >> 2),
680 0x00000000,
681 (0x0e00 << 16) | (0x3093c >> 2),
682 0x00000000,
683 (0x0e00 << 16) | (0x30940 >> 2),
684 0x00000000,
685 (0x0e00 << 16) | (0x89a0 >> 2),
686 0x00000000,
687 (0x0e00 << 16) | (0x30900 >> 2),
688 0x00000000,
689 (0x0e00 << 16) | (0x30904 >> 2),
690 0x00000000,
691 (0x0e00 << 16) | (0x89b4 >> 2),
692 0x00000000,
693 (0x0e00 << 16) | (0x3c210 >> 2),
694 0x00000000,
695 (0x0e00 << 16) | (0x3c214 >> 2),
696 0x00000000,
697 (0x0e00 << 16) | (0x3c218 >> 2),
698 0x00000000,
699 (0x0e00 << 16) | (0x8904 >> 2),
700 0x00000000,
701 0x5,
702 (0x0e00 << 16) | (0x8c28 >> 2),
703 (0x0e00 << 16) | (0x8c2c >> 2),
704 (0x0e00 << 16) | (0x8c30 >> 2),
705 (0x0e00 << 16) | (0x8c34 >> 2),
706 (0x0e00 << 16) | (0x9600 >> 2),
707};
708
709static const u32 kalindi_rlc_save_restore_register_list[] =
710{
711 (0x0e00 << 16) | (0xc12c >> 2),
712 0x00000000,
713 (0x0e00 << 16) | (0xc140 >> 2),
714 0x00000000,
715 (0x0e00 << 16) | (0xc150 >> 2),
716 0x00000000,
717 (0x0e00 << 16) | (0xc15c >> 2),
718 0x00000000,
719 (0x0e00 << 16) | (0xc168 >> 2),
720 0x00000000,
721 (0x0e00 << 16) | (0xc170 >> 2),
722 0x00000000,
723 (0x0e00 << 16) | (0xc204 >> 2),
724 0x00000000,
725 (0x0e00 << 16) | (0xc2b4 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0xc2b8 >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0xc2bc >> 2),
730 0x00000000,
731 (0x0e00 << 16) | (0xc2c0 >> 2),
732 0x00000000,
733 (0x0e00 << 16) | (0x8228 >> 2),
734 0x00000000,
735 (0x0e00 << 16) | (0x829c >> 2),
736 0x00000000,
737 (0x0e00 << 16) | (0x869c >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x98f4 >> 2),
740 0x00000000,
741 (0x0e00 << 16) | (0x98f8 >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0x9900 >> 2),
744 0x00000000,
745 (0x0e00 << 16) | (0xc260 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0x90e8 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0x3c000 >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0x3c00c >> 2),
752 0x00000000,
753 (0x0e00 << 16) | (0x8c1c >> 2),
754 0x00000000,
755 (0x0e00 << 16) | (0x9700 >> 2),
756 0x00000000,
757 (0x0e00 << 16) | (0xcd20 >> 2),
758 0x00000000,
759 (0x4e00 << 16) | (0xcd20 >> 2),
760 0x00000000,
761 (0x5e00 << 16) | (0xcd20 >> 2),
762 0x00000000,
763 (0x6e00 << 16) | (0xcd20 >> 2),
764 0x00000000,
765 (0x7e00 << 16) | (0xcd20 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0x89bc >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0x8900 >> 2),
770 0x00000000,
771 0x3,
772 (0x0e00 << 16) | (0xc130 >> 2),
773 0x00000000,
774 (0x0e00 << 16) | (0xc134 >> 2),
775 0x00000000,
776 (0x0e00 << 16) | (0xc1fc >> 2),
777 0x00000000,
778 (0x0e00 << 16) | (0xc208 >> 2),
779 0x00000000,
780 (0x0e00 << 16) | (0xc264 >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0xc268 >> 2),
783 0x00000000,
784 (0x0e00 << 16) | (0xc26c >> 2),
785 0x00000000,
786 (0x0e00 << 16) | (0xc270 >> 2),
787 0x00000000,
788 (0x0e00 << 16) | (0xc274 >> 2),
789 0x00000000,
790 (0x0e00 << 16) | (0xc28c >> 2),
791 0x00000000,
792 (0x0e00 << 16) | (0xc290 >> 2),
793 0x00000000,
794 (0x0e00 << 16) | (0xc294 >> 2),
795 0x00000000,
796 (0x0e00 << 16) | (0xc298 >> 2),
797 0x00000000,
798 (0x0e00 << 16) | (0xc2a0 >> 2),
799 0x00000000,
800 (0x0e00 << 16) | (0xc2a4 >> 2),
801 0x00000000,
802 (0x0e00 << 16) | (0xc2a8 >> 2),
803 0x00000000,
804 (0x0e00 << 16) | (0xc2ac >> 2),
805 0x00000000,
806 (0x0e00 << 16) | (0x301d0 >> 2),
807 0x00000000,
808 (0x0e00 << 16) | (0x30238 >> 2),
809 0x00000000,
810 (0x0e00 << 16) | (0x30250 >> 2),
811 0x00000000,
812 (0x0e00 << 16) | (0x30254 >> 2),
813 0x00000000,
814 (0x0e00 << 16) | (0x30258 >> 2),
815 0x00000000,
816 (0x0e00 << 16) | (0x3025c >> 2),
817 0x00000000,
818 (0x4e00 << 16) | (0xc900 >> 2),
819 0x00000000,
820 (0x5e00 << 16) | (0xc900 >> 2),
821 0x00000000,
822 (0x6e00 << 16) | (0xc900 >> 2),
823 0x00000000,
824 (0x7e00 << 16) | (0xc900 >> 2),
825 0x00000000,
826 (0x4e00 << 16) | (0xc904 >> 2),
827 0x00000000,
828 (0x5e00 << 16) | (0xc904 >> 2),
829 0x00000000,
830 (0x6e00 << 16) | (0xc904 >> 2),
831 0x00000000,
832 (0x7e00 << 16) | (0xc904 >> 2),
833 0x00000000,
834 (0x4e00 << 16) | (0xc908 >> 2),
835 0x00000000,
836 (0x5e00 << 16) | (0xc908 >> 2),
837 0x00000000,
838 (0x6e00 << 16) | (0xc908 >> 2),
839 0x00000000,
840 (0x7e00 << 16) | (0xc908 >> 2),
841 0x00000000,
842 (0x4e00 << 16) | (0xc90c >> 2),
843 0x00000000,
844 (0x5e00 << 16) | (0xc90c >> 2),
845 0x00000000,
846 (0x6e00 << 16) | (0xc90c >> 2),
847 0x00000000,
848 (0x7e00 << 16) | (0xc90c >> 2),
849 0x00000000,
850 (0x4e00 << 16) | (0xc910 >> 2),
851 0x00000000,
852 (0x5e00 << 16) | (0xc910 >> 2),
853 0x00000000,
854 (0x6e00 << 16) | (0xc910 >> 2),
855 0x00000000,
856 (0x7e00 << 16) | (0xc910 >> 2),
857 0x00000000,
858 (0x0e00 << 16) | (0xc99c >> 2),
859 0x00000000,
860 (0x0e00 << 16) | (0x9834 >> 2),
861 0x00000000,
862 (0x0000 << 16) | (0x30f00 >> 2),
863 0x00000000,
864 (0x0000 << 16) | (0x30f04 >> 2),
865 0x00000000,
866 (0x0000 << 16) | (0x30f08 >> 2),
867 0x00000000,
868 (0x0000 << 16) | (0x30f0c >> 2),
869 0x00000000,
870 (0x0600 << 16) | (0x9b7c >> 2),
871 0x00000000,
872 (0x0e00 << 16) | (0x8a14 >> 2),
873 0x00000000,
874 (0x0e00 << 16) | (0x8a18 >> 2),
875 0x00000000,
876 (0x0600 << 16) | (0x30a00 >> 2),
877 0x00000000,
878 (0x0e00 << 16) | (0x8bf0 >> 2),
879 0x00000000,
880 (0x0e00 << 16) | (0x8bcc >> 2),
881 0x00000000,
882 (0x0e00 << 16) | (0x8b24 >> 2),
883 0x00000000,
884 (0x0e00 << 16) | (0x30a04 >> 2),
885 0x00000000,
886 (0x0600 << 16) | (0x30a10 >> 2),
887 0x00000000,
888 (0x0600 << 16) | (0x30a14 >> 2),
889 0x00000000,
890 (0x0600 << 16) | (0x30a18 >> 2),
891 0x00000000,
892 (0x0600 << 16) | (0x30a2c >> 2),
893 0x00000000,
894 (0x0e00 << 16) | (0xc700 >> 2),
895 0x00000000,
896 (0x0e00 << 16) | (0xc704 >> 2),
897 0x00000000,
898 (0x0e00 << 16) | (0xc708 >> 2),
899 0x00000000,
900 (0x0e00 << 16) | (0xc768 >> 2),
901 0x00000000,
902 (0x0400 << 16) | (0xc770 >> 2),
903 0x00000000,
904 (0x0400 << 16) | (0xc774 >> 2),
905 0x00000000,
906 (0x0400 << 16) | (0xc798 >> 2),
907 0x00000000,
908 (0x0400 << 16) | (0xc79c >> 2),
909 0x00000000,
910 (0x0e00 << 16) | (0x9100 >> 2),
911 0x00000000,
912 (0x0e00 << 16) | (0x3c010 >> 2),
913 0x00000000,
914 (0x0e00 << 16) | (0x8c00 >> 2),
915 0x00000000,
916 (0x0e00 << 16) | (0x8c04 >> 2),
917 0x00000000,
918 (0x0e00 << 16) | (0x8c20 >> 2),
919 0x00000000,
920 (0x0e00 << 16) | (0x8c38 >> 2),
921 0x00000000,
922 (0x0e00 << 16) | (0x8c3c >> 2),
923 0x00000000,
924 (0x0e00 << 16) | (0xae00 >> 2),
925 0x00000000,
926 (0x0e00 << 16) | (0x9604 >> 2),
927 0x00000000,
928 (0x0e00 << 16) | (0xac08 >> 2),
929 0x00000000,
930 (0x0e00 << 16) | (0xac0c >> 2),
931 0x00000000,
932 (0x0e00 << 16) | (0xac10 >> 2),
933 0x00000000,
934 (0x0e00 << 16) | (0xac14 >> 2),
935 0x00000000,
936 (0x0e00 << 16) | (0xac58 >> 2),
937 0x00000000,
938 (0x0e00 << 16) | (0xac68 >> 2),
939 0x00000000,
940 (0x0e00 << 16) | (0xac6c >> 2),
941 0x00000000,
942 (0x0e00 << 16) | (0xac70 >> 2),
943 0x00000000,
944 (0x0e00 << 16) | (0xac74 >> 2),
945 0x00000000,
946 (0x0e00 << 16) | (0xac78 >> 2),
947 0x00000000,
948 (0x0e00 << 16) | (0xac7c >> 2),
949 0x00000000,
950 (0x0e00 << 16) | (0xac80 >> 2),
951 0x00000000,
952 (0x0e00 << 16) | (0xac84 >> 2),
953 0x00000000,
954 (0x0e00 << 16) | (0xac88 >> 2),
955 0x00000000,
956 (0x0e00 << 16) | (0xac8c >> 2),
957 0x00000000,
958 (0x0e00 << 16) | (0x970c >> 2),
959 0x00000000,
960 (0x0e00 << 16) | (0x9714 >> 2),
961 0x00000000,
962 (0x0e00 << 16) | (0x9718 >> 2),
963 0x00000000,
964 (0x0e00 << 16) | (0x971c >> 2),
965 0x00000000,
966 (0x0e00 << 16) | (0x31068 >> 2),
967 0x00000000,
968 (0x4e00 << 16) | (0x31068 >> 2),
969 0x00000000,
970 (0x5e00 << 16) | (0x31068 >> 2),
971 0x00000000,
972 (0x6e00 << 16) | (0x31068 >> 2),
973 0x00000000,
974 (0x7e00 << 16) | (0x31068 >> 2),
975 0x00000000,
976 (0x0e00 << 16) | (0xcd10 >> 2),
977 0x00000000,
978 (0x0e00 << 16) | (0xcd14 >> 2),
979 0x00000000,
980 (0x0e00 << 16) | (0x88b0 >> 2),
981 0x00000000,
982 (0x0e00 << 16) | (0x88b4 >> 2),
983 0x00000000,
984 (0x0e00 << 16) | (0x88b8 >> 2),
985 0x00000000,
986 (0x0e00 << 16) | (0x88bc >> 2),
987 0x00000000,
988 (0x0400 << 16) | (0x89c0 >> 2),
989 0x00000000,
990 (0x0e00 << 16) | (0x88c4 >> 2),
991 0x00000000,
992 (0x0e00 << 16) | (0x88c8 >> 2),
993 0x00000000,
994 (0x0e00 << 16) | (0x88d0 >> 2),
995 0x00000000,
996 (0x0e00 << 16) | (0x88d4 >> 2),
997 0x00000000,
998 (0x0e00 << 16) | (0x88d8 >> 2),
999 0x00000000,
1000 (0x0e00 << 16) | (0x8980 >> 2),
1001 0x00000000,
1002 (0x0e00 << 16) | (0x30938 >> 2),
1003 0x00000000,
1004 (0x0e00 << 16) | (0x3093c >> 2),
1005 0x00000000,
1006 (0x0e00 << 16) | (0x30940 >> 2),
1007 0x00000000,
1008 (0x0e00 << 16) | (0x89a0 >> 2),
1009 0x00000000,
1010 (0x0e00 << 16) | (0x30900 >> 2),
1011 0x00000000,
1012 (0x0e00 << 16) | (0x30904 >> 2),
1013 0x00000000,
1014 (0x0e00 << 16) | (0x89b4 >> 2),
1015 0x00000000,
1016 (0x0e00 << 16) | (0x3e1fc >> 2),
1017 0x00000000,
1018 (0x0e00 << 16) | (0x3c210 >> 2),
1019 0x00000000,
1020 (0x0e00 << 16) | (0x3c214 >> 2),
1021 0x00000000,
1022 (0x0e00 << 16) | (0x3c218 >> 2),
1023 0x00000000,
1024 (0x0e00 << 16) | (0x8904 >> 2),
1025 0x00000000,
1026 0x5,
1027 (0x0e00 << 16) | (0x8c28 >> 2),
1028 (0x0e00 << 16) | (0x8c2c >> 2),
1029 (0x0e00 << 16) | (0x8c30 >> 2),
1030 (0x0e00 << 16) | (0x8c34 >> 2),
1031 (0x0e00 << 16) | (0x9600 >> 2),
1032};
1033
1034static const u32 bonaire_golden_spm_registers[] =
1035{
1036 0x30800, 0xe0ffffff, 0xe0000000
1037};
1038
1039static const u32 bonaire_golden_common_registers[] =
1040{
1041 0xc770, 0xffffffff, 0x00000800,
1042 0xc774, 0xffffffff, 0x00000800,
1043 0xc798, 0xffffffff, 0x00007fbf,
1044 0xc79c, 0xffffffff, 0x00007faf
1045};
1046
1047static const u32 bonaire_golden_registers[] =
1048{
1049 0x3354, 0x00000333, 0x00000333,
1050 0x3350, 0x000c0fc0, 0x00040200,
1051 0x9a10, 0x00010000, 0x00058208,
1052 0x3c000, 0xffff1fff, 0x00140000,
1053 0x3c200, 0xfdfc0fff, 0x00000100,
1054 0x3c234, 0x40000000, 0x40000200,
1055 0x9830, 0xffffffff, 0x00000000,
1056 0x9834, 0xf00fffff, 0x00000400,
1057 0x9838, 0x0002021c, 0x00020200,
1058 0xc78, 0x00000080, 0x00000000,
1059 0x5bb0, 0x000000f0, 0x00000070,
1060 0x5bc0, 0xf0311fff, 0x80300000,
1061 0x98f8, 0x73773777, 0x12010001,
1062 0x350c, 0x00810000, 0x408af000,
1063 0x7030, 0x31000111, 0x00000011,
1064 0x2f48, 0x73773777, 0x12010001,
1065 0x220c, 0x00007fb6, 0x0021a1b1,
1066 0x2210, 0x00007fb6, 0x002021b1,
1067 0x2180, 0x00007fb6, 0x00002191,
1068 0x2218, 0x00007fb6, 0x002121b1,
1069 0x221c, 0x00007fb6, 0x002021b1,
1070 0x21dc, 0x00007fb6, 0x00002191,
1071 0x21e0, 0x00007fb6, 0x00002191,
1072 0x3628, 0x0000003f, 0x0000000a,
1073 0x362c, 0x0000003f, 0x0000000a,
1074 0x2ae4, 0x00073ffe, 0x000022a2,
1075 0x240c, 0x000007ff, 0x00000000,
1076 0x8a14, 0xf000003f, 0x00000007,
1077 0x8bf0, 0x00002001, 0x00000001,
1078 0x8b24, 0xffffffff, 0x00ffffff,
1079 0x30a04, 0x0000ff0f, 0x00000000,
1080 0x28a4c, 0x07ffffff, 0x06000000,
1081 0x4d8, 0x00000fff, 0x00000100,
1082 0x3e78, 0x00000001, 0x00000002,
1083 0x9100, 0x03000000, 0x0362c688,
1084 0x8c00, 0x000000ff, 0x00000001,
1085 0xe40, 0x00001fff, 0x00001fff,
1086 0x9060, 0x0000007f, 0x00000020,
1087 0x9508, 0x00010000, 0x00010000,
1088 0xac14, 0x000003ff, 0x000000f3,
1089 0xac0c, 0xffffffff, 0x00001032
1090};
1091
1092static const u32 bonaire_mgcg_cgcg_init[] =
1093{
1094 0xc420, 0xffffffff, 0xfffffffc,
1095 0x30800, 0xffffffff, 0xe0000000,
1096 0x3c2a0, 0xffffffff, 0x00000100,
1097 0x3c208, 0xffffffff, 0x00000100,
1098 0x3c2c0, 0xffffffff, 0xc0000100,
1099 0x3c2c8, 0xffffffff, 0xc0000100,
1100 0x3c2c4, 0xffffffff, 0xc0000100,
1101 0x55e4, 0xffffffff, 0x00600100,
1102 0x3c280, 0xffffffff, 0x00000100,
1103 0x3c214, 0xffffffff, 0x06000100,
1104 0x3c220, 0xffffffff, 0x00000100,
1105 0x3c218, 0xffffffff, 0x06000100,
1106 0x3c204, 0xffffffff, 0x00000100,
1107 0x3c2e0, 0xffffffff, 0x00000100,
1108 0x3c224, 0xffffffff, 0x00000100,
1109 0x3c200, 0xffffffff, 0x00000100,
1110 0x3c230, 0xffffffff, 0x00000100,
1111 0x3c234, 0xffffffff, 0x00000100,
1112 0x3c250, 0xffffffff, 0x00000100,
1113 0x3c254, 0xffffffff, 0x00000100,
1114 0x3c258, 0xffffffff, 0x00000100,
1115 0x3c25c, 0xffffffff, 0x00000100,
1116 0x3c260, 0xffffffff, 0x00000100,
1117 0x3c27c, 0xffffffff, 0x00000100,
1118 0x3c278, 0xffffffff, 0x00000100,
1119 0x3c210, 0xffffffff, 0x06000100,
1120 0x3c290, 0xffffffff, 0x00000100,
1121 0x3c274, 0xffffffff, 0x00000100,
1122 0x3c2b4, 0xffffffff, 0x00000100,
1123 0x3c2b0, 0xffffffff, 0x00000100,
1124 0x3c270, 0xffffffff, 0x00000100,
1125 0x30800, 0xffffffff, 0xe0000000,
1126 0x3c020, 0xffffffff, 0x00010000,
1127 0x3c024, 0xffffffff, 0x00030002,
1128 0x3c028, 0xffffffff, 0x00040007,
1129 0x3c02c, 0xffffffff, 0x00060005,
1130 0x3c030, 0xffffffff, 0x00090008,
1131 0x3c034, 0xffffffff, 0x00010000,
1132 0x3c038, 0xffffffff, 0x00030002,
1133 0x3c03c, 0xffffffff, 0x00040007,
1134 0x3c040, 0xffffffff, 0x00060005,
1135 0x3c044, 0xffffffff, 0x00090008,
1136 0x3c048, 0xffffffff, 0x00010000,
1137 0x3c04c, 0xffffffff, 0x00030002,
1138 0x3c050, 0xffffffff, 0x00040007,
1139 0x3c054, 0xffffffff, 0x00060005,
1140 0x3c058, 0xffffffff, 0x00090008,
1141 0x3c05c, 0xffffffff, 0x00010000,
1142 0x3c060, 0xffffffff, 0x00030002,
1143 0x3c064, 0xffffffff, 0x00040007,
1144 0x3c068, 0xffffffff, 0x00060005,
1145 0x3c06c, 0xffffffff, 0x00090008,
1146 0x3c070, 0xffffffff, 0x00010000,
1147 0x3c074, 0xffffffff, 0x00030002,
1148 0x3c078, 0xffffffff, 0x00040007,
1149 0x3c07c, 0xffffffff, 0x00060005,
1150 0x3c080, 0xffffffff, 0x00090008,
1151 0x3c084, 0xffffffff, 0x00010000,
1152 0x3c088, 0xffffffff, 0x00030002,
1153 0x3c08c, 0xffffffff, 0x00040007,
1154 0x3c090, 0xffffffff, 0x00060005,
1155 0x3c094, 0xffffffff, 0x00090008,
1156 0x3c098, 0xffffffff, 0x00010000,
1157 0x3c09c, 0xffffffff, 0x00030002,
1158 0x3c0a0, 0xffffffff, 0x00040007,
1159 0x3c0a4, 0xffffffff, 0x00060005,
1160 0x3c0a8, 0xffffffff, 0x00090008,
1161 0x3c000, 0xffffffff, 0x96e00200,
1162 0x8708, 0xffffffff, 0x00900100,
1163 0xc424, 0xffffffff, 0x0020003f,
1164 0x38, 0xffffffff, 0x0140001c,
1165 0x3c, 0x000f0000, 0x000f0000,
1166 0x220, 0xffffffff, 0xC060000C,
1167 0x224, 0xc0000fff, 0x00000100,
1168 0xf90, 0xffffffff, 0x00000100,
1169 0xf98, 0x00000101, 0x00000000,
1170 0x20a8, 0xffffffff, 0x00000104,
1171 0x55e4, 0xff000fff, 0x00000100,
1172 0x30cc, 0xc0000fff, 0x00000104,
1173 0xc1e4, 0x00000001, 0x00000001,
1174 0xd00c, 0xff000ff0, 0x00000100,
1175 0xd80c, 0xff000ff0, 0x00000100
1176};
1177
1178static const u32 spectre_golden_spm_registers[] =
1179{
1180 0x30800, 0xe0ffffff, 0xe0000000
1181};
1182
1183static const u32 spectre_golden_common_registers[] =
1184{
1185 0xc770, 0xffffffff, 0x00000800,
1186 0xc774, 0xffffffff, 0x00000800,
1187 0xc798, 0xffffffff, 0x00007fbf,
1188 0xc79c, 0xffffffff, 0x00007faf
1189};
1190
1191static const u32 spectre_golden_registers[] =
1192{
1193 0x3c000, 0xffff1fff, 0x96940200,
1194 0x3c00c, 0xffff0001, 0xff000000,
1195 0x3c200, 0xfffc0fff, 0x00000100,
1196 0x6ed8, 0x00010101, 0x00010000,
1197 0x9834, 0xf00fffff, 0x00000400,
1198 0x9838, 0xfffffffc, 0x00020200,
1199 0x5bb0, 0x000000f0, 0x00000070,
1200 0x5bc0, 0xf0311fff, 0x80300000,
1201 0x98f8, 0x73773777, 0x12010001,
1202 0x9b7c, 0x00ff0000, 0x00fc0000,
1203 0x2f48, 0x73773777, 0x12010001,
1204 0x8a14, 0xf000003f, 0x00000007,
1205 0x8b24, 0xffffffff, 0x00ffffff,
1206 0x28350, 0x3f3f3fff, 0x00000082,
1207 0x28354, 0x0000003f, 0x00000000,
1208 0x3e78, 0x00000001, 0x00000002,
1209 0x913c, 0xffff03df, 0x00000004,
1210 0xc768, 0x00000008, 0x00000008,
1211 0x8c00, 0x000008ff, 0x00000800,
1212 0x9508, 0x00010000, 0x00010000,
1213 0xac0c, 0xffffffff, 0x54763210,
1214 0x214f8, 0x01ff01ff, 0x00000002,
1215 0x21498, 0x007ff800, 0x00200000,
1216 0x2015c, 0xffffffff, 0x00000f40,
1217 0x30934, 0xffffffff, 0x00000001
1218};
1219
1220static const u32 spectre_mgcg_cgcg_init[] =
1221{
1222 0xc420, 0xffffffff, 0xfffffffc,
1223 0x30800, 0xffffffff, 0xe0000000,
1224 0x3c2a0, 0xffffffff, 0x00000100,
1225 0x3c208, 0xffffffff, 0x00000100,
1226 0x3c2c0, 0xffffffff, 0x00000100,
1227 0x3c2c8, 0xffffffff, 0x00000100,
1228 0x3c2c4, 0xffffffff, 0x00000100,
1229 0x55e4, 0xffffffff, 0x00600100,
1230 0x3c280, 0xffffffff, 0x00000100,
1231 0x3c214, 0xffffffff, 0x06000100,
1232 0x3c220, 0xffffffff, 0x00000100,
1233 0x3c218, 0xffffffff, 0x06000100,
1234 0x3c204, 0xffffffff, 0x00000100,
1235 0x3c2e0, 0xffffffff, 0x00000100,
1236 0x3c224, 0xffffffff, 0x00000100,
1237 0x3c200, 0xffffffff, 0x00000100,
1238 0x3c230, 0xffffffff, 0x00000100,
1239 0x3c234, 0xffffffff, 0x00000100,
1240 0x3c250, 0xffffffff, 0x00000100,
1241 0x3c254, 0xffffffff, 0x00000100,
1242 0x3c258, 0xffffffff, 0x00000100,
1243 0x3c25c, 0xffffffff, 0x00000100,
1244 0x3c260, 0xffffffff, 0x00000100,
1245 0x3c27c, 0xffffffff, 0x00000100,
1246 0x3c278, 0xffffffff, 0x00000100,
1247 0x3c210, 0xffffffff, 0x06000100,
1248 0x3c290, 0xffffffff, 0x00000100,
1249 0x3c274, 0xffffffff, 0x00000100,
1250 0x3c2b4, 0xffffffff, 0x00000100,
1251 0x3c2b0, 0xffffffff, 0x00000100,
1252 0x3c270, 0xffffffff, 0x00000100,
1253 0x30800, 0xffffffff, 0xe0000000,
1254 0x3c020, 0xffffffff, 0x00010000,
1255 0x3c024, 0xffffffff, 0x00030002,
1256 0x3c028, 0xffffffff, 0x00040007,
1257 0x3c02c, 0xffffffff, 0x00060005,
1258 0x3c030, 0xffffffff, 0x00090008,
1259 0x3c034, 0xffffffff, 0x00010000,
1260 0x3c038, 0xffffffff, 0x00030002,
1261 0x3c03c, 0xffffffff, 0x00040007,
1262 0x3c040, 0xffffffff, 0x00060005,
1263 0x3c044, 0xffffffff, 0x00090008,
1264 0x3c048, 0xffffffff, 0x00010000,
1265 0x3c04c, 0xffffffff, 0x00030002,
1266 0x3c050, 0xffffffff, 0x00040007,
1267 0x3c054, 0xffffffff, 0x00060005,
1268 0x3c058, 0xffffffff, 0x00090008,
1269 0x3c05c, 0xffffffff, 0x00010000,
1270 0x3c060, 0xffffffff, 0x00030002,
1271 0x3c064, 0xffffffff, 0x00040007,
1272 0x3c068, 0xffffffff, 0x00060005,
1273 0x3c06c, 0xffffffff, 0x00090008,
1274 0x3c070, 0xffffffff, 0x00010000,
1275 0x3c074, 0xffffffff, 0x00030002,
1276 0x3c078, 0xffffffff, 0x00040007,
1277 0x3c07c, 0xffffffff, 0x00060005,
1278 0x3c080, 0xffffffff, 0x00090008,
1279 0x3c084, 0xffffffff, 0x00010000,
1280 0x3c088, 0xffffffff, 0x00030002,
1281 0x3c08c, 0xffffffff, 0x00040007,
1282 0x3c090, 0xffffffff, 0x00060005,
1283 0x3c094, 0xffffffff, 0x00090008,
1284 0x3c098, 0xffffffff, 0x00010000,
1285 0x3c09c, 0xffffffff, 0x00030002,
1286 0x3c0a0, 0xffffffff, 0x00040007,
1287 0x3c0a4, 0xffffffff, 0x00060005,
1288 0x3c0a8, 0xffffffff, 0x00090008,
1289 0x3c0ac, 0xffffffff, 0x00010000,
1290 0x3c0b0, 0xffffffff, 0x00030002,
1291 0x3c0b4, 0xffffffff, 0x00040007,
1292 0x3c0b8, 0xffffffff, 0x00060005,
1293 0x3c0bc, 0xffffffff, 0x00090008,
1294 0x3c000, 0xffffffff, 0x96e00200,
1295 0x8708, 0xffffffff, 0x00900100,
1296 0xc424, 0xffffffff, 0x0020003f,
1297 0x38, 0xffffffff, 0x0140001c,
1298 0x3c, 0x000f0000, 0x000f0000,
1299 0x220, 0xffffffff, 0xC060000C,
1300 0x224, 0xc0000fff, 0x00000100,
1301 0xf90, 0xffffffff, 0x00000100,
1302 0xf98, 0x00000101, 0x00000000,
1303 0x20a8, 0xffffffff, 0x00000104,
1304 0x55e4, 0xff000fff, 0x00000100,
1305 0x30cc, 0xc0000fff, 0x00000104,
1306 0xc1e4, 0x00000001, 0x00000001,
1307 0xd00c, 0xff000ff0, 0x00000100,
1308 0xd80c, 0xff000ff0, 0x00000100
1309};
1310
1311static const u32 kalindi_golden_spm_registers[] =
1312{
1313 0x30800, 0xe0ffffff, 0xe0000000
1314};
1315
1316static const u32 kalindi_golden_common_registers[] =
1317{
1318 0xc770, 0xffffffff, 0x00000800,
1319 0xc774, 0xffffffff, 0x00000800,
1320 0xc798, 0xffffffff, 0x00007fbf,
1321 0xc79c, 0xffffffff, 0x00007faf
1322};
1323
1324static const u32 kalindi_golden_registers[] =
1325{
1326 0x3c000, 0xffffdfff, 0x6e944040,
1327 0x55e4, 0xff607fff, 0xfc000100,
1328 0x3c220, 0xff000fff, 0x00000100,
1329 0x3c224, 0xff000fff, 0x00000100,
1330 0x3c200, 0xfffc0fff, 0x00000100,
1331 0x6ed8, 0x00010101, 0x00010000,
1332 0x9830, 0xffffffff, 0x00000000,
1333 0x9834, 0xf00fffff, 0x00000400,
1334 0x5bb0, 0x000000f0, 0x00000070,
1335 0x5bc0, 0xf0311fff, 0x80300000,
1336 0x98f8, 0x73773777, 0x12010001,
1337 0x98fc, 0xffffffff, 0x00000010,
1338 0x9b7c, 0x00ff0000, 0x00fc0000,
1339 0x8030, 0x00001f0f, 0x0000100a,
1340 0x2f48, 0x73773777, 0x12010001,
1341 0x2408, 0x000fffff, 0x000c007f,
1342 0x8a14, 0xf000003f, 0x00000007,
1343 0x8b24, 0x3fff3fff, 0x00ffcfff,
1344 0x30a04, 0x0000ff0f, 0x00000000,
1345 0x28a4c, 0x07ffffff, 0x06000000,
1346 0x4d8, 0x00000fff, 0x00000100,
1347 0x3e78, 0x00000001, 0x00000002,
1348 0xc768, 0x00000008, 0x00000008,
1349 0x8c00, 0x000000ff, 0x00000003,
1350 0x214f8, 0x01ff01ff, 0x00000002,
1351 0x21498, 0x007ff800, 0x00200000,
1352 0x2015c, 0xffffffff, 0x00000f40,
1353 0x88c4, 0x001f3ae3, 0x00000082,
1354 0x88d4, 0x0000001f, 0x00000010,
1355 0x30934, 0xffffffff, 0x00000000
1356};
1357
1358static const u32 kalindi_mgcg_cgcg_init[] =
1359{
1360 0xc420, 0xffffffff, 0xfffffffc,
1361 0x30800, 0xffffffff, 0xe0000000,
1362 0x3c2a0, 0xffffffff, 0x00000100,
1363 0x3c208, 0xffffffff, 0x00000100,
1364 0x3c2c0, 0xffffffff, 0x00000100,
1365 0x3c2c8, 0xffffffff, 0x00000100,
1366 0x3c2c4, 0xffffffff, 0x00000100,
1367 0x55e4, 0xffffffff, 0x00600100,
1368 0x3c280, 0xffffffff, 0x00000100,
1369 0x3c214, 0xffffffff, 0x06000100,
1370 0x3c220, 0xffffffff, 0x00000100,
1371 0x3c218, 0xffffffff, 0x06000100,
1372 0x3c204, 0xffffffff, 0x00000100,
1373 0x3c2e0, 0xffffffff, 0x00000100,
1374 0x3c224, 0xffffffff, 0x00000100,
1375 0x3c200, 0xffffffff, 0x00000100,
1376 0x3c230, 0xffffffff, 0x00000100,
1377 0x3c234, 0xffffffff, 0x00000100,
1378 0x3c250, 0xffffffff, 0x00000100,
1379 0x3c254, 0xffffffff, 0x00000100,
1380 0x3c258, 0xffffffff, 0x00000100,
1381 0x3c25c, 0xffffffff, 0x00000100,
1382 0x3c260, 0xffffffff, 0x00000100,
1383 0x3c27c, 0xffffffff, 0x00000100,
1384 0x3c278, 0xffffffff, 0x00000100,
1385 0x3c210, 0xffffffff, 0x06000100,
1386 0x3c290, 0xffffffff, 0x00000100,
1387 0x3c274, 0xffffffff, 0x00000100,
1388 0x3c2b4, 0xffffffff, 0x00000100,
1389 0x3c2b0, 0xffffffff, 0x00000100,
1390 0x3c270, 0xffffffff, 0x00000100,
1391 0x30800, 0xffffffff, 0xe0000000,
1392 0x3c020, 0xffffffff, 0x00010000,
1393 0x3c024, 0xffffffff, 0x00030002,
1394 0x3c028, 0xffffffff, 0x00040007,
1395 0x3c02c, 0xffffffff, 0x00060005,
1396 0x3c030, 0xffffffff, 0x00090008,
1397 0x3c034, 0xffffffff, 0x00010000,
1398 0x3c038, 0xffffffff, 0x00030002,
1399 0x3c03c, 0xffffffff, 0x00040007,
1400 0x3c040, 0xffffffff, 0x00060005,
1401 0x3c044, 0xffffffff, 0x00090008,
1402 0x3c000, 0xffffffff, 0x96e00200,
1403 0x8708, 0xffffffff, 0x00900100,
1404 0xc424, 0xffffffff, 0x0020003f,
1405 0x38, 0xffffffff, 0x0140001c,
1406 0x3c, 0x000f0000, 0x000f0000,
1407 0x220, 0xffffffff, 0xC060000C,
1408 0x224, 0xc0000fff, 0x00000100,
1409 0x20a8, 0xffffffff, 0x00000104,
1410 0x55e4, 0xff000fff, 0x00000100,
1411 0x30cc, 0xc0000fff, 0x00000104,
1412 0xc1e4, 0x00000001, 0x00000001,
1413 0xd00c, 0xff000ff0, 0x00000100,
1414 0xd80c, 0xff000ff0, 0x00000100
1415};
1416
1417static const u32 hawaii_golden_spm_registers[] =
1418{
1419 0x30800, 0xe0ffffff, 0xe0000000
1420};
1421
1422static const u32 hawaii_golden_common_registers[] =
1423{
1424 0x30800, 0xffffffff, 0xe0000000,
1425 0x28350, 0xffffffff, 0x3a00161a,
1426 0x28354, 0xffffffff, 0x0000002e,
1427 0x9a10, 0xffffffff, 0x00018208,
1428 0x98f8, 0xffffffff, 0x12011003
1429};
1430
1431static const u32 hawaii_golden_registers[] =
1432{
1433 0x3354, 0x00000333, 0x00000333,
1434 0x9a10, 0x00010000, 0x00058208,
1435 0x9830, 0xffffffff, 0x00000000,
1436 0x9834, 0xf00fffff, 0x00000400,
1437 0x9838, 0x0002021c, 0x00020200,
1438 0xc78, 0x00000080, 0x00000000,
1439 0x5bb0, 0x000000f0, 0x00000070,
1440 0x5bc0, 0xf0311fff, 0x80300000,
1441 0x350c, 0x00810000, 0x408af000,
1442 0x7030, 0x31000111, 0x00000011,
1443 0x2f48, 0x73773777, 0x12010001,
1444 0x2120, 0x0000007f, 0x0000001b,
1445 0x21dc, 0x00007fb6, 0x00002191,
1446 0x3628, 0x0000003f, 0x0000000a,
1447 0x362c, 0x0000003f, 0x0000000a,
1448 0x2ae4, 0x00073ffe, 0x000022a2,
1449 0x240c, 0x000007ff, 0x00000000,
1450 0x8bf0, 0x00002001, 0x00000001,
1451 0x8b24, 0xffffffff, 0x00ffffff,
1452 0x30a04, 0x0000ff0f, 0x00000000,
1453 0x28a4c, 0x07ffffff, 0x06000000,
1454 0x3e78, 0x00000001, 0x00000002,
1455 0xc768, 0x00000008, 0x00000008,
1456 0xc770, 0x00000f00, 0x00000800,
1457 0xc774, 0x00000f00, 0x00000800,
1458 0xc798, 0x00ffffff, 0x00ff7fbf,
1459 0xc79c, 0x00ffffff, 0x00ff7faf,
1460 0x8c00, 0x000000ff, 0x00000800,
1461 0xe40, 0x00001fff, 0x00001fff,
1462 0x9060, 0x0000007f, 0x00000020,
1463 0x9508, 0x00010000, 0x00010000,
1464 0xae00, 0x00100000, 0x000ff07c,
1465 0xac14, 0x000003ff, 0x0000000f,
1466 0xac10, 0xffffffff, 0x7564fdec,
1467 0xac0c, 0xffffffff, 0x3120b9a8,
1468 0xac08, 0x20000000, 0x0f9c0000
1469};
1470
1471static const u32 hawaii_mgcg_cgcg_init[] =
1472{
1473 0xc420, 0xffffffff, 0xfffffffd,
1474 0x30800, 0xffffffff, 0xe0000000,
1475 0x3c2a0, 0xffffffff, 0x00000100,
1476 0x3c208, 0xffffffff, 0x00000100,
1477 0x3c2c0, 0xffffffff, 0x00000100,
1478 0x3c2c8, 0xffffffff, 0x00000100,
1479 0x3c2c4, 0xffffffff, 0x00000100,
1480 0x55e4, 0xffffffff, 0x00200100,
1481 0x3c280, 0xffffffff, 0x00000100,
1482 0x3c214, 0xffffffff, 0x06000100,
1483 0x3c220, 0xffffffff, 0x00000100,
1484 0x3c218, 0xffffffff, 0x06000100,
1485 0x3c204, 0xffffffff, 0x00000100,
1486 0x3c2e0, 0xffffffff, 0x00000100,
1487 0x3c224, 0xffffffff, 0x00000100,
1488 0x3c200, 0xffffffff, 0x00000100,
1489 0x3c230, 0xffffffff, 0x00000100,
1490 0x3c234, 0xffffffff, 0x00000100,
1491 0x3c250, 0xffffffff, 0x00000100,
1492 0x3c254, 0xffffffff, 0x00000100,
1493 0x3c258, 0xffffffff, 0x00000100,
1494 0x3c25c, 0xffffffff, 0x00000100,
1495 0x3c260, 0xffffffff, 0x00000100,
1496 0x3c27c, 0xffffffff, 0x00000100,
1497 0x3c278, 0xffffffff, 0x00000100,
1498 0x3c210, 0xffffffff, 0x06000100,
1499 0x3c290, 0xffffffff, 0x00000100,
1500 0x3c274, 0xffffffff, 0x00000100,
1501 0x3c2b4, 0xffffffff, 0x00000100,
1502 0x3c2b0, 0xffffffff, 0x00000100,
1503 0x3c270, 0xffffffff, 0x00000100,
1504 0x30800, 0xffffffff, 0xe0000000,
1505 0x3c020, 0xffffffff, 0x00010000,
1506 0x3c024, 0xffffffff, 0x00030002,
1507 0x3c028, 0xffffffff, 0x00040007,
1508 0x3c02c, 0xffffffff, 0x00060005,
1509 0x3c030, 0xffffffff, 0x00090008,
1510 0x3c034, 0xffffffff, 0x00010000,
1511 0x3c038, 0xffffffff, 0x00030002,
1512 0x3c03c, 0xffffffff, 0x00040007,
1513 0x3c040, 0xffffffff, 0x00060005,
1514 0x3c044, 0xffffffff, 0x00090008,
1515 0x3c048, 0xffffffff, 0x00010000,
1516 0x3c04c, 0xffffffff, 0x00030002,
1517 0x3c050, 0xffffffff, 0x00040007,
1518 0x3c054, 0xffffffff, 0x00060005,
1519 0x3c058, 0xffffffff, 0x00090008,
1520 0x3c05c, 0xffffffff, 0x00010000,
1521 0x3c060, 0xffffffff, 0x00030002,
1522 0x3c064, 0xffffffff, 0x00040007,
1523 0x3c068, 0xffffffff, 0x00060005,
1524 0x3c06c, 0xffffffff, 0x00090008,
1525 0x3c070, 0xffffffff, 0x00010000,
1526 0x3c074, 0xffffffff, 0x00030002,
1527 0x3c078, 0xffffffff, 0x00040007,
1528 0x3c07c, 0xffffffff, 0x00060005,
1529 0x3c080, 0xffffffff, 0x00090008,
1530 0x3c084, 0xffffffff, 0x00010000,
1531 0x3c088, 0xffffffff, 0x00030002,
1532 0x3c08c, 0xffffffff, 0x00040007,
1533 0x3c090, 0xffffffff, 0x00060005,
1534 0x3c094, 0xffffffff, 0x00090008,
1535 0x3c098, 0xffffffff, 0x00010000,
1536 0x3c09c, 0xffffffff, 0x00030002,
1537 0x3c0a0, 0xffffffff, 0x00040007,
1538 0x3c0a4, 0xffffffff, 0x00060005,
1539 0x3c0a8, 0xffffffff, 0x00090008,
1540 0x3c0ac, 0xffffffff, 0x00010000,
1541 0x3c0b0, 0xffffffff, 0x00030002,
1542 0x3c0b4, 0xffffffff, 0x00040007,
1543 0x3c0b8, 0xffffffff, 0x00060005,
1544 0x3c0bc, 0xffffffff, 0x00090008,
1545 0x3c0c0, 0xffffffff, 0x00010000,
1546 0x3c0c4, 0xffffffff, 0x00030002,
1547 0x3c0c8, 0xffffffff, 0x00040007,
1548 0x3c0cc, 0xffffffff, 0x00060005,
1549 0x3c0d0, 0xffffffff, 0x00090008,
1550 0x3c0d4, 0xffffffff, 0x00010000,
1551 0x3c0d8, 0xffffffff, 0x00030002,
1552 0x3c0dc, 0xffffffff, 0x00040007,
1553 0x3c0e0, 0xffffffff, 0x00060005,
1554 0x3c0e4, 0xffffffff, 0x00090008,
1555 0x3c0e8, 0xffffffff, 0x00010000,
1556 0x3c0ec, 0xffffffff, 0x00030002,
1557 0x3c0f0, 0xffffffff, 0x00040007,
1558 0x3c0f4, 0xffffffff, 0x00060005,
1559 0x3c0f8, 0xffffffff, 0x00090008,
1560 0xc318, 0xffffffff, 0x00020200,
1561 0x3350, 0xffffffff, 0x00000200,
1562 0x15c0, 0xffffffff, 0x00000400,
1563 0x55e8, 0xffffffff, 0x00000000,
1564 0x2f50, 0xffffffff, 0x00000902,
1565 0x3c000, 0xffffffff, 0x96940200,
1566 0x8708, 0xffffffff, 0x00900100,
1567 0xc424, 0xffffffff, 0x0020003f,
1568 0x38, 0xffffffff, 0x0140001c,
1569 0x3c, 0x000f0000, 0x000f0000,
1570 0x220, 0xffffffff, 0xc060000c,
1571 0x224, 0xc0000fff, 0x00000100,
1572 0xf90, 0xffffffff, 0x00000100,
1573 0xf98, 0x00000101, 0x00000000,
1574 0x20a8, 0xffffffff, 0x00000104,
1575 0x55e4, 0xff000fff, 0x00000100,
1576 0x30cc, 0xc0000fff, 0x00000104,
1577 0xc1e4, 0x00000001, 0x00000001,
1578 0xd00c, 0xff000ff0, 0x00000100,
1579 0xd80c, 0xff000ff0, 0x00000100
1580};
1581
1582static const u32 godavari_golden_registers[] =
1583{
1584 0x55e4, 0xff607fff, 0xfc000100,
1585 0x6ed8, 0x00010101, 0x00010000,
1586 0x9830, 0xffffffff, 0x00000000,
1587 0x98302, 0xf00fffff, 0x00000400,
1588 0x6130, 0xffffffff, 0x00010000,
1589 0x5bb0, 0x000000f0, 0x00000070,
1590 0x5bc0, 0xf0311fff, 0x80300000,
1591 0x98f8, 0x73773777, 0x12010001,
1592 0x98fc, 0xffffffff, 0x00000010,
1593 0x8030, 0x00001f0f, 0x0000100a,
1594 0x2f48, 0x73773777, 0x12010001,
1595 0x2408, 0x000fffff, 0x000c007f,
1596 0x8a14, 0xf000003f, 0x00000007,
1597 0x8b24, 0xffffffff, 0x00ff0fff,
1598 0x30a04, 0x0000ff0f, 0x00000000,
1599 0x28a4c, 0x07ffffff, 0x06000000,
1600 0x4d8, 0x00000fff, 0x00000100,
1601 0xd014, 0x00010000, 0x00810001,
1602 0xd814, 0x00010000, 0x00810001,
1603 0x3e78, 0x00000001, 0x00000002,
1604 0xc768, 0x00000008, 0x00000008,
1605 0xc770, 0x00000f00, 0x00000800,
1606 0xc774, 0x00000f00, 0x00000800,
1607 0xc798, 0x00ffffff, 0x00ff7fbf,
1608 0xc79c, 0x00ffffff, 0x00ff7faf,
1609 0x8c00, 0x000000ff, 0x00000001,
1610 0x214f8, 0x01ff01ff, 0x00000002,
1611 0x21498, 0x007ff800, 0x00200000,
1612 0x2015c, 0xffffffff, 0x00000f40,
1613 0x88c4, 0x001f3ae3, 0x00000082,
1614 0x88d4, 0x0000001f, 0x00000010,
1615 0x30934, 0xffffffff, 0x00000000
1616};
1617
1618
1619static void cik_init_golden_registers(struct radeon_device *rdev)
1620{
1621 switch (rdev->family) {
1622 case CHIP_BONAIRE:
1623 radeon_program_register_sequence(rdev,
1624 bonaire_mgcg_cgcg_init,
1625 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1626 radeon_program_register_sequence(rdev,
1627 bonaire_golden_registers,
1628 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1629 radeon_program_register_sequence(rdev,
1630 bonaire_golden_common_registers,
1631 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1632 radeon_program_register_sequence(rdev,
1633 bonaire_golden_spm_registers,
1634 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1635 break;
1636 case CHIP_KABINI:
1637 radeon_program_register_sequence(rdev,
1638 kalindi_mgcg_cgcg_init,
1639 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1640 radeon_program_register_sequence(rdev,
1641 kalindi_golden_registers,
1642 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1643 radeon_program_register_sequence(rdev,
1644 kalindi_golden_common_registers,
1645 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1646 radeon_program_register_sequence(rdev,
1647 kalindi_golden_spm_registers,
1648 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1649 break;
1650 case CHIP_MULLINS:
1651 radeon_program_register_sequence(rdev,
1652 kalindi_mgcg_cgcg_init,
1653 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1654 radeon_program_register_sequence(rdev,
1655 godavari_golden_registers,
1656 (const u32)ARRAY_SIZE(godavari_golden_registers));
1657 radeon_program_register_sequence(rdev,
1658 kalindi_golden_common_registers,
1659 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1660 radeon_program_register_sequence(rdev,
1661 kalindi_golden_spm_registers,
1662 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1663 break;
1664 case CHIP_KAVERI:
1665 radeon_program_register_sequence(rdev,
1666 spectre_mgcg_cgcg_init,
1667 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1668 radeon_program_register_sequence(rdev,
1669 spectre_golden_registers,
1670 (const u32)ARRAY_SIZE(spectre_golden_registers));
1671 radeon_program_register_sequence(rdev,
1672 spectre_golden_common_registers,
1673 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1674 radeon_program_register_sequence(rdev,
1675 spectre_golden_spm_registers,
1676 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1677 break;
1678 case CHIP_HAWAII:
1679 radeon_program_register_sequence(rdev,
1680 hawaii_mgcg_cgcg_init,
1681 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1682 radeon_program_register_sequence(rdev,
1683 hawaii_golden_registers,
1684 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1685 radeon_program_register_sequence(rdev,
1686 hawaii_golden_common_registers,
1687 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1688 radeon_program_register_sequence(rdev,
1689 hawaii_golden_spm_registers,
1690 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1691 break;
1692 default:
1693 break;
1694 }
1695}
1696
1697/**
1698 * cik_get_xclk - get the xclk
1699 *
1700 * @rdev: radeon_device pointer
1701 *
1702 * Returns the reference clock used by the gfx engine
1703 * (CIK).
1704 */
1705u32 cik_get_xclk(struct radeon_device *rdev)
1706{
1707 u32 reference_clock = rdev->clock.spll.reference_freq;
1708
1709 if (rdev->flags & RADEON_IS_IGP) {
1710 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1711 return reference_clock / 2;
1712 } else {
1713 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1714 return reference_clock / 4;
1715 }
1716 return reference_clock;
1717}
1718
1719/**
1720 * cik_mm_rdoorbell - read a doorbell dword
1721 *
1722 * @rdev: radeon_device pointer
1723 * @index: doorbell index
1724 *
1725 * Returns the value in the doorbell aperture at the
1726 * requested doorbell index (CIK).
1727 */
1728u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1729{
1730 if (index < rdev->doorbell.num_doorbells) {
1731 return readl(rdev->doorbell.ptr + index);
1732 } else {
1733 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1734 return 0;
1735 }
1736}
1737
1738/**
1739 * cik_mm_wdoorbell - write a doorbell dword
1740 *
1741 * @rdev: radeon_device pointer
1742 * @index: doorbell index
1743 * @v: value to write
1744 *
1745 * Writes @v to the doorbell aperture at the
1746 * requested doorbell index (CIK).
1747 */
1748void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1749{
1750 if (index < rdev->doorbell.num_doorbells) {
1751 writel(v, rdev->doorbell.ptr + index);
1752 } else {
1753 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1754 }
1755}
1756
1757#define BONAIRE_IO_MC_REGS_SIZE 36
1758
1759static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1760{
1761 {0x00000070, 0x04400000},
1762 {0x00000071, 0x80c01803},
1763 {0x00000072, 0x00004004},
1764 {0x00000073, 0x00000100},
1765 {0x00000074, 0x00ff0000},
1766 {0x00000075, 0x34000000},
1767 {0x00000076, 0x08000014},
1768 {0x00000077, 0x00cc08ec},
1769 {0x00000078, 0x00000400},
1770 {0x00000079, 0x00000000},
1771 {0x0000007a, 0x04090000},
1772 {0x0000007c, 0x00000000},
1773 {0x0000007e, 0x4408a8e8},
1774 {0x0000007f, 0x00000304},
1775 {0x00000080, 0x00000000},
1776 {0x00000082, 0x00000001},
1777 {0x00000083, 0x00000002},
1778 {0x00000084, 0xf3e4f400},
1779 {0x00000085, 0x052024e3},
1780 {0x00000087, 0x00000000},
1781 {0x00000088, 0x01000000},
1782 {0x0000008a, 0x1c0a0000},
1783 {0x0000008b, 0xff010000},
1784 {0x0000008d, 0xffffefff},
1785 {0x0000008e, 0xfff3efff},
1786 {0x0000008f, 0xfff3efbf},
1787 {0x00000092, 0xf7ffffff},
1788 {0x00000093, 0xffffff7f},
1789 {0x00000095, 0x00101101},
1790 {0x00000096, 0x00000fff},
1791 {0x00000097, 0x00116fff},
1792 {0x00000098, 0x60010000},
1793 {0x00000099, 0x10010000},
1794 {0x0000009a, 0x00006000},
1795 {0x0000009b, 0x00001000},
1796 {0x0000009f, 0x00b48000}
1797};
1798
1799#define HAWAII_IO_MC_REGS_SIZE 22
1800
1801static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1802{
1803 {0x0000007d, 0x40000000},
1804 {0x0000007e, 0x40180304},
1805 {0x0000007f, 0x0000ff00},
1806 {0x00000081, 0x00000000},
1807 {0x00000083, 0x00000800},
1808 {0x00000086, 0x00000000},
1809 {0x00000087, 0x00000100},
1810 {0x00000088, 0x00020100},
1811 {0x00000089, 0x00000000},
1812 {0x0000008b, 0x00040000},
1813 {0x0000008c, 0x00000100},
1814 {0x0000008e, 0xff010000},
1815 {0x00000090, 0xffffefff},
1816 {0x00000091, 0xfff3efff},
1817 {0x00000092, 0xfff3efbf},
1818 {0x00000093, 0xf7ffffff},
1819 {0x00000094, 0xffffff7f},
1820 {0x00000095, 0x00000fff},
1821 {0x00000096, 0x00116fff},
1822 {0x00000097, 0x60010000},
1823 {0x00000098, 0x10010000},
1824 {0x0000009f, 0x00c79000}
1825};
1826
1827
1828/**
1829 * cik_srbm_select - select specific register instances
1830 *
1831 * @rdev: radeon_device pointer
1832 * @me: selected ME (micro engine)
1833 * @pipe: pipe
1834 * @queue: queue
1835 * @vmid: VMID
1836 *
1837 * Switches the currently active registers instances. Some
1838 * registers are instanced per VMID, others are instanced per
1839 * me/pipe/queue combination.
1840 */
1841static void cik_srbm_select(struct radeon_device *rdev,
1842 u32 me, u32 pipe, u32 queue, u32 vmid)
1843{
1844 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1845 MEID(me & 0x3) |
1846 VMID(vmid & 0xf) |
1847 QUEUEID(queue & 0x7));
1848 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1849}
1850
1851/* ucode loading */
1852/**
1853 * ci_mc_load_microcode - load MC ucode into the hw
1854 *
1855 * @rdev: radeon_device pointer
1856 *
1857 * Load the GDDR MC ucode into the hw (CIK).
1858 * Returns 0 on success, error on failure.
1859 */
1860int ci_mc_load_microcode(struct radeon_device *rdev)
1861{
1862 const __be32 *fw_data = NULL;
1863 const __le32 *new_fw_data = NULL;
1864 u32 running, tmp;
1865 u32 *io_mc_regs = NULL;
1866 const __le32 *new_io_mc_regs = NULL;
1867 int i, regs_size, ucode_size;
1868
1869 if (!rdev->mc_fw)
1870 return -EINVAL;
1871
1872 if (rdev->new_fw) {
1873 const struct mc_firmware_header_v1_0 *hdr =
1874 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1875
1876 radeon_ucode_print_mc_hdr(&hdr->header);
1877
1878 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1879 new_io_mc_regs = (const __le32 *)
1880 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1881 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1882 new_fw_data = (const __le32 *)
1883 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1884 } else {
1885 ucode_size = rdev->mc_fw->size / 4;
1886
1887 switch (rdev->family) {
1888 case CHIP_BONAIRE:
1889 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1890 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1891 break;
1892 case CHIP_HAWAII:
1893 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1894 regs_size = HAWAII_IO_MC_REGS_SIZE;
1895 break;
1896 default:
1897 return -EINVAL;
1898 }
1899 fw_data = (const __be32 *)rdev->mc_fw->data;
1900 }
1901
1902 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1903
1904 if (running == 0) {
1905 /* reset the engine and set to writable */
1906 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1907 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1908
1909 /* load mc io regs */
1910 for (i = 0; i < regs_size; i++) {
1911 if (rdev->new_fw) {
1912 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1913 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1914 } else {
1915 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1916 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1917 }
1918 }
1919
1920 tmp = RREG32(MC_SEQ_MISC0);
1921 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1922 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1923 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1924 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1925 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1926 }
1927
1928 /* load the MC ucode */
1929 for (i = 0; i < ucode_size; i++) {
1930 if (rdev->new_fw)
1931 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1932 else
1933 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1934 }
1935
1936 /* put the engine back into the active state */
1937 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1938 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1939 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1940
1941 /* wait for training to complete */
1942 for (i = 0; i < rdev->usec_timeout; i++) {
1943 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1944 break;
1945 udelay(1);
1946 }
1947 for (i = 0; i < rdev->usec_timeout; i++) {
1948 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1949 break;
1950 udelay(1);
1951 }
1952 }
1953
1954 return 0;
1955}
1956
1957/**
1958 * cik_init_microcode - load ucode images from disk
1959 *
1960 * @rdev: radeon_device pointer
1961 *
1962 * Use the firmware interface to load the ucode images into
1963 * the driver (not loaded into hw).
1964 * Returns 0 on success, error on failure.
1965 */
1966static int cik_init_microcode(struct radeon_device *rdev)
1967{
1968 const char *chip_name;
1969 const char *new_chip_name;
1970 size_t pfp_req_size, me_req_size, ce_req_size,
1971 mec_req_size, rlc_req_size, mc_req_size = 0,
1972 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1973 char fw_name[30];
1974 int new_fw = 0;
1975 int err;
1976 int num_fw;
1977 bool new_smc = false;
1978
1979 DRM_DEBUG("\n");
1980
1981 switch (rdev->family) {
1982 case CHIP_BONAIRE:
1983 chip_name = "BONAIRE";
1984 if ((rdev->pdev->revision == 0x80) ||
1985 (rdev->pdev->revision == 0x81) ||
1986 (rdev->pdev->device == 0x665f))
1987 new_smc = true;
1988 new_chip_name = "bonaire";
1989 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1990 me_req_size = CIK_ME_UCODE_SIZE * 4;
1991 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1992 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1993 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1994 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1995 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1996 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1997 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1998 num_fw = 8;
1999 break;
2000 case CHIP_HAWAII:
2001 chip_name = "HAWAII";
2002 if (rdev->pdev->revision == 0x80)
2003 new_smc = true;
2004 new_chip_name = "hawaii";
2005 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2006 me_req_size = CIK_ME_UCODE_SIZE * 4;
2007 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2008 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2009 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2010 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2011 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2012 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2013 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2014 num_fw = 8;
2015 break;
2016 case CHIP_KAVERI:
2017 chip_name = "KAVERI";
2018 new_chip_name = "kaveri";
2019 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2020 me_req_size = CIK_ME_UCODE_SIZE * 4;
2021 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2022 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2023 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2024 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2025 num_fw = 7;
2026 break;
2027 case CHIP_KABINI:
2028 chip_name = "KABINI";
2029 new_chip_name = "kabini";
2030 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2031 me_req_size = CIK_ME_UCODE_SIZE * 4;
2032 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2033 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2034 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2035 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2036 num_fw = 6;
2037 break;
2038 case CHIP_MULLINS:
2039 chip_name = "MULLINS";
2040 new_chip_name = "mullins";
2041 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2042 me_req_size = CIK_ME_UCODE_SIZE * 4;
2043 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2044 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2045 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2046 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2047 num_fw = 6;
2048 break;
2049 default: BUG();
2050 }
2051
2052 DRM_INFO("Loading %s Microcode\n", new_chip_name);
2053
2054 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2055 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2056 if (err) {
2057 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2058 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2059 if (err)
2060 goto out;
2061 if (rdev->pfp_fw->size != pfp_req_size) {
2062 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2063 rdev->pfp_fw->size, fw_name);
2064 err = -EINVAL;
2065 goto out;
2066 }
2067 } else {
2068 err = radeon_ucode_validate(rdev->pfp_fw);
2069 if (err) {
2070 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2071 fw_name);
2072 goto out;
2073 } else {
2074 new_fw++;
2075 }
2076 }
2077
2078 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2079 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2080 if (err) {
2081 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2082 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2083 if (err)
2084 goto out;
2085 if (rdev->me_fw->size != me_req_size) {
2086 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2087 rdev->me_fw->size, fw_name);
2088 err = -EINVAL;
2089 }
2090 } else {
2091 err = radeon_ucode_validate(rdev->me_fw);
2092 if (err) {
2093 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2094 fw_name);
2095 goto out;
2096 } else {
2097 new_fw++;
2098 }
2099 }
2100
2101 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2102 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2103 if (err) {
2104 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2105 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2106 if (err)
2107 goto out;
2108 if (rdev->ce_fw->size != ce_req_size) {
2109 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2110 rdev->ce_fw->size, fw_name);
2111 err = -EINVAL;
2112 }
2113 } else {
2114 err = radeon_ucode_validate(rdev->ce_fw);
2115 if (err) {
2116 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2117 fw_name);
2118 goto out;
2119 } else {
2120 new_fw++;
2121 }
2122 }
2123
2124 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2125 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2126 if (err) {
2127 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2128 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2129 if (err)
2130 goto out;
2131 if (rdev->mec_fw->size != mec_req_size) {
2132 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2133 rdev->mec_fw->size, fw_name);
2134 err = -EINVAL;
2135 }
2136 } else {
2137 err = radeon_ucode_validate(rdev->mec_fw);
2138 if (err) {
2139 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2140 fw_name);
2141 goto out;
2142 } else {
2143 new_fw++;
2144 }
2145 }
2146
2147 if (rdev->family == CHIP_KAVERI) {
2148 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2149 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2150 if (err) {
2151 goto out;
2152 } else {
2153 err = radeon_ucode_validate(rdev->mec2_fw);
2154 if (err) {
2155 goto out;
2156 } else {
2157 new_fw++;
2158 }
2159 }
2160 }
2161
2162 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2163 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2164 if (err) {
2165 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2166 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2167 if (err)
2168 goto out;
2169 if (rdev->rlc_fw->size != rlc_req_size) {
2170 pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2171 rdev->rlc_fw->size, fw_name);
2172 err = -EINVAL;
2173 }
2174 } else {
2175 err = radeon_ucode_validate(rdev->rlc_fw);
2176 if (err) {
2177 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2178 fw_name);
2179 goto out;
2180 } else {
2181 new_fw++;
2182 }
2183 }
2184
2185 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2186 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2187 if (err) {
2188 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2189 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2190 if (err)
2191 goto out;
2192 if (rdev->sdma_fw->size != sdma_req_size) {
2193 pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2194 rdev->sdma_fw->size, fw_name);
2195 err = -EINVAL;
2196 }
2197 } else {
2198 err = radeon_ucode_validate(rdev->sdma_fw);
2199 if (err) {
2200 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2201 fw_name);
2202 goto out;
2203 } else {
2204 new_fw++;
2205 }
2206 }
2207
2208 /* No SMC, MC ucode on APUs */
2209 if (!(rdev->flags & RADEON_IS_IGP)) {
2210 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2211 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2212 if (err) {
2213 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2214 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2215 if (err) {
2216 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2217 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2218 if (err)
2219 goto out;
2220 }
2221 if ((rdev->mc_fw->size != mc_req_size) &&
2222 (rdev->mc_fw->size != mc2_req_size)){
2223 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2224 rdev->mc_fw->size, fw_name);
2225 err = -EINVAL;
2226 }
2227 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2228 } else {
2229 err = radeon_ucode_validate(rdev->mc_fw);
2230 if (err) {
2231 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2232 fw_name);
2233 goto out;
2234 } else {
2235 new_fw++;
2236 }
2237 }
2238
2239 if (new_smc)
2240 snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2241 else
2242 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2243 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2244 if (err) {
2245 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2246 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2247 if (err) {
2248 pr_err("smc: error loading firmware \"%s\"\n",
2249 fw_name);
2250 release_firmware(rdev->smc_fw);
2251 rdev->smc_fw = NULL;
2252 err = 0;
2253 } else if (rdev->smc_fw->size != smc_req_size) {
2254 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2255 rdev->smc_fw->size, fw_name);
2256 err = -EINVAL;
2257 }
2258 } else {
2259 err = radeon_ucode_validate(rdev->smc_fw);
2260 if (err) {
2261 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2262 fw_name);
2263 goto out;
2264 } else {
2265 new_fw++;
2266 }
2267 }
2268 }
2269
2270 if (new_fw == 0) {
2271 rdev->new_fw = false;
2272 } else if (new_fw < num_fw) {
2273 pr_err("ci_fw: mixing new and old firmware!\n");
2274 err = -EINVAL;
2275 } else {
2276 rdev->new_fw = true;
2277 }
2278
2279out:
2280 if (err) {
2281 if (err != -EINVAL)
2282 pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2283 fw_name);
2284 release_firmware(rdev->pfp_fw);
2285 rdev->pfp_fw = NULL;
2286 release_firmware(rdev->me_fw);
2287 rdev->me_fw = NULL;
2288 release_firmware(rdev->ce_fw);
2289 rdev->ce_fw = NULL;
2290 release_firmware(rdev->mec_fw);
2291 rdev->mec_fw = NULL;
2292 release_firmware(rdev->mec2_fw);
2293 rdev->mec2_fw = NULL;
2294 release_firmware(rdev->rlc_fw);
2295 rdev->rlc_fw = NULL;
2296 release_firmware(rdev->sdma_fw);
2297 rdev->sdma_fw = NULL;
2298 release_firmware(rdev->mc_fw);
2299 rdev->mc_fw = NULL;
2300 release_firmware(rdev->smc_fw);
2301 rdev->smc_fw = NULL;
2302 }
2303 return err;
2304}
2305
2306/*
2307 * Core functions
2308 */
2309/**
2310 * cik_tiling_mode_table_init - init the hw tiling table
2311 *
2312 * @rdev: radeon_device pointer
2313 *
2314 * Starting with SI, the tiling setup is done globally in a
2315 * set of 32 tiling modes. Rather than selecting each set of
2316 * parameters per surface as on older asics, we just select
2317 * which index in the tiling table we want to use, and the
2318 * surface uses those parameters (CIK).
2319 */
2320static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2321{
2322 u32 *tile = rdev->config.cik.tile_mode_array;
2323 u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2324 const u32 num_tile_mode_states =
2325 ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2326 const u32 num_secondary_tile_mode_states =
2327 ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2328 u32 reg_offset, split_equal_to_row_size;
2329 u32 num_pipe_configs;
2330 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2331 rdev->config.cik.max_shader_engines;
2332
2333 switch (rdev->config.cik.mem_row_size_in_kb) {
2334 case 1:
2335 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2336 break;
2337 case 2:
2338 default:
2339 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2340 break;
2341 case 4:
2342 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2343 break;
2344 }
2345
2346 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2347 if (num_pipe_configs > 8)
2348 num_pipe_configs = 16;
2349
2350 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2351 tile[reg_offset] = 0;
2352 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2353 macrotile[reg_offset] = 0;
2354
2355 switch(num_pipe_configs) {
2356 case 16:
2357 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2360 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2361 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2364 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2365 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2367 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2368 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2369 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2371 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2372 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2373 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2376 TILE_SPLIT(split_equal_to_row_size));
2377 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2378 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2380 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2381 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2384 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2386 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 TILE_SPLIT(split_equal_to_row_size));
2388 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2389 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2390 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2393 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2397 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2405 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2408 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2413 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2414 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2416 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2420 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2423 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2424 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2427 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2435
2436 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2439 NUM_BANKS(ADDR_SURF_16_BANK));
2440 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2443 NUM_BANKS(ADDR_SURF_16_BANK));
2444 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2447 NUM_BANKS(ADDR_SURF_16_BANK));
2448 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2451 NUM_BANKS(ADDR_SURF_16_BANK));
2452 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2455 NUM_BANKS(ADDR_SURF_8_BANK));
2456 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2459 NUM_BANKS(ADDR_SURF_4_BANK));
2460 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2463 NUM_BANKS(ADDR_SURF_2_BANK));
2464 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2467 NUM_BANKS(ADDR_SURF_16_BANK));
2468 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2471 NUM_BANKS(ADDR_SURF_16_BANK));
2472 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2475 NUM_BANKS(ADDR_SURF_16_BANK));
2476 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2479 NUM_BANKS(ADDR_SURF_8_BANK));
2480 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2483 NUM_BANKS(ADDR_SURF_4_BANK));
2484 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2487 NUM_BANKS(ADDR_SURF_2_BANK));
2488 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2491 NUM_BANKS(ADDR_SURF_2_BANK));
2492
2493 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2494 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2495 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2496 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2497 break;
2498
2499 case 8:
2500 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2502 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2503 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2504 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2506 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2507 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2508 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2510 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2511 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2512 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2515 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2516 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2519 TILE_SPLIT(split_equal_to_row_size));
2520 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2527 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2528 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 TILE_SPLIT(split_equal_to_row_size));
2531 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2532 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2533 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2536 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2544 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2545 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2548 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2551 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2552 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2554 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2555 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2556 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2557 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2559 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2561 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2563 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2566 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2576 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2577 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2578
2579 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2580 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2581 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2582 NUM_BANKS(ADDR_SURF_16_BANK));
2583 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2584 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2585 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2586 NUM_BANKS(ADDR_SURF_16_BANK));
2587 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2589 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2590 NUM_BANKS(ADDR_SURF_16_BANK));
2591 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2593 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2594 NUM_BANKS(ADDR_SURF_16_BANK));
2595 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2597 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2598 NUM_BANKS(ADDR_SURF_8_BANK));
2599 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2600 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2601 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2602 NUM_BANKS(ADDR_SURF_4_BANK));
2603 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2604 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2605 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2606 NUM_BANKS(ADDR_SURF_2_BANK));
2607 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2608 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2609 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2610 NUM_BANKS(ADDR_SURF_16_BANK));
2611 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2613 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2614 NUM_BANKS(ADDR_SURF_16_BANK));
2615 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2618 NUM_BANKS(ADDR_SURF_16_BANK));
2619 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2620 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2621 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2622 NUM_BANKS(ADDR_SURF_16_BANK));
2623 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2625 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2626 NUM_BANKS(ADDR_SURF_8_BANK));
2627 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2629 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2630 NUM_BANKS(ADDR_SURF_4_BANK));
2631 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2632 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2633 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2634 NUM_BANKS(ADDR_SURF_2_BANK));
2635
2636 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2637 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2638 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2639 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2640 break;
2641
2642 case 4:
2643 if (num_rbs == 4) {
2644 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2645 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2646 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2647 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2648 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2649 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2650 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2651 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2652 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2653 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2654 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2655 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2656 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2657 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2658 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2659 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2660 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2663 TILE_SPLIT(split_equal_to_row_size));
2664 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2665 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2666 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2667 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2669 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2670 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2671 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2673 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2674 TILE_SPLIT(split_equal_to_row_size));
2675 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2676 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2677 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2680 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2682 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2684 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2685 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2686 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2688 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2690 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2693 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2694 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2695 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2699 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2701 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2705 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2707 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2710 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2716 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2718 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2722
2723 } else if (num_rbs < 4) {
2724 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2725 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2726 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2727 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2728 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2730 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2732 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2733 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2734 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2735 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2736 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2737 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2738 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2739 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2740 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2741 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2742 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 TILE_SPLIT(split_equal_to_row_size));
2744 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2745 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2747 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2749 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2751 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2753 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2754 TILE_SPLIT(split_equal_to_row_size));
2755 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2756 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2757 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2760 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2761 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2762 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2764 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2765 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2766 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2768 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2770 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2773 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2774 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2775 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2778 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2779 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2781 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2783 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2785 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2787 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2788 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2789 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2790 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2791 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2792 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2794 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2795 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2796 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2798 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2799 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2800 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2802 }
2803
2804 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2806 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2807 NUM_BANKS(ADDR_SURF_16_BANK));
2808 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2811 NUM_BANKS(ADDR_SURF_16_BANK));
2812 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2813 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2814 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2815 NUM_BANKS(ADDR_SURF_16_BANK));
2816 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2819 NUM_BANKS(ADDR_SURF_16_BANK));
2820 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2823 NUM_BANKS(ADDR_SURF_16_BANK));
2824 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2827 NUM_BANKS(ADDR_SURF_8_BANK));
2828 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2829 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2830 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2831 NUM_BANKS(ADDR_SURF_4_BANK));
2832 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 NUM_BANKS(ADDR_SURF_16_BANK));
2836 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2839 NUM_BANKS(ADDR_SURF_16_BANK));
2840 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2841 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2842 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2843 NUM_BANKS(ADDR_SURF_16_BANK));
2844 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847 NUM_BANKS(ADDR_SURF_16_BANK));
2848 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2849 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2850 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2851 NUM_BANKS(ADDR_SURF_16_BANK));
2852 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 NUM_BANKS(ADDR_SURF_8_BANK));
2856 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2859 NUM_BANKS(ADDR_SURF_4_BANK));
2860
2861 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2862 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2863 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2864 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2865 break;
2866
2867 case 2:
2868 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2869 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2870 PIPE_CONFIG(ADDR_SURF_P2) |
2871 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2872 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2873 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2874 PIPE_CONFIG(ADDR_SURF_P2) |
2875 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2876 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2877 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2878 PIPE_CONFIG(ADDR_SURF_P2) |
2879 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2880 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2881 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2882 PIPE_CONFIG(ADDR_SURF_P2) |
2883 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2884 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886 PIPE_CONFIG(ADDR_SURF_P2) |
2887 TILE_SPLIT(split_equal_to_row_size));
2888 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889 PIPE_CONFIG(ADDR_SURF_P2) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2891 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2892 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2893 PIPE_CONFIG(ADDR_SURF_P2) |
2894 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2895 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2896 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2897 PIPE_CONFIG(ADDR_SURF_P2) |
2898 TILE_SPLIT(split_equal_to_row_size));
2899 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2900 PIPE_CONFIG(ADDR_SURF_P2);
2901 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2903 PIPE_CONFIG(ADDR_SURF_P2));
2904 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2906 PIPE_CONFIG(ADDR_SURF_P2) |
2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2908 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2910 PIPE_CONFIG(ADDR_SURF_P2) |
2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2912 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2914 PIPE_CONFIG(ADDR_SURF_P2) |
2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2917 PIPE_CONFIG(ADDR_SURF_P2) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2919 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2921 PIPE_CONFIG(ADDR_SURF_P2) |
2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2923 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2924 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2925 PIPE_CONFIG(ADDR_SURF_P2) |
2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2928 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2929 PIPE_CONFIG(ADDR_SURF_P2) |
2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2931 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2932 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2933 PIPE_CONFIG(ADDR_SURF_P2));
2934 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2935 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2936 PIPE_CONFIG(ADDR_SURF_P2) |
2937 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2939 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2940 PIPE_CONFIG(ADDR_SURF_P2) |
2941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2942 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2943 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2944 PIPE_CONFIG(ADDR_SURF_P2) |
2945 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2946
2947 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2950 NUM_BANKS(ADDR_SURF_16_BANK));
2951 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2954 NUM_BANKS(ADDR_SURF_16_BANK));
2955 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958 NUM_BANKS(ADDR_SURF_16_BANK));
2959 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2960 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2961 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2962 NUM_BANKS(ADDR_SURF_16_BANK));
2963 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2966 NUM_BANKS(ADDR_SURF_16_BANK));
2967 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2970 NUM_BANKS(ADDR_SURF_16_BANK));
2971 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2972 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2973 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2974 NUM_BANKS(ADDR_SURF_8_BANK));
2975 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2978 NUM_BANKS(ADDR_SURF_16_BANK));
2979 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2982 NUM_BANKS(ADDR_SURF_16_BANK));
2983 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2986 NUM_BANKS(ADDR_SURF_16_BANK));
2987 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2990 NUM_BANKS(ADDR_SURF_16_BANK));
2991 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2992 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2993 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2994 NUM_BANKS(ADDR_SURF_16_BANK));
2995 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2996 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2997 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2998 NUM_BANKS(ADDR_SURF_16_BANK));
2999 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3000 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3001 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3002 NUM_BANKS(ADDR_SURF_8_BANK));
3003
3004 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3005 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3006 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3007 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3008 break;
3009
3010 default:
3011 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3012 }
3013}
3014
3015/**
3016 * cik_select_se_sh - select which SE, SH to address
3017 *
3018 * @rdev: radeon_device pointer
3019 * @se_num: shader engine to address
3020 * @sh_num: sh block to address
3021 *
3022 * Select which SE, SH combinations to address. Certain
3023 * registers are instanced per SE or SH. 0xffffffff means
3024 * broadcast to all SEs or SHs (CIK).
3025 */
3026static void cik_select_se_sh(struct radeon_device *rdev,
3027 u32 se_num, u32 sh_num)
3028{
3029 u32 data = INSTANCE_BROADCAST_WRITES;
3030
3031 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3032 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3033 else if (se_num == 0xffffffff)
3034 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3035 else if (sh_num == 0xffffffff)
3036 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3037 else
3038 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3039 WREG32(GRBM_GFX_INDEX, data);
3040}
3041
3042/**
3043 * cik_create_bitmask - create a bitmask
3044 *
3045 * @bit_width: length of the mask
3046 *
3047 * create a variable length bit mask (CIK).
3048 * Returns the bitmask.
3049 */
3050static u32 cik_create_bitmask(u32 bit_width)
3051{
3052 u32 i, mask = 0;
3053
3054 for (i = 0; i < bit_width; i++) {
3055 mask <<= 1;
3056 mask |= 1;
3057 }
3058 return mask;
3059}
3060
3061/**
3062 * cik_get_rb_disabled - computes the mask of disabled RBs
3063 *
3064 * @rdev: radeon_device pointer
3065 * @max_rb_num_per_se: max RBs (render backends) per SE (shader engine) for the asic
3066 * @sh_per_se: number of SH blocks per SE for the asic
3067 *
3068 * Calculates the bitmask of disabled RBs (CIK).
3069 * Returns the disabled RB bitmask.
3070 */
3071static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3072 u32 max_rb_num_per_se,
3073 u32 sh_per_se)
3074{
3075 u32 data, mask;
3076
3077 data = RREG32(CC_RB_BACKEND_DISABLE);
3078 if (data & 1)
3079 data &= BACKEND_DISABLE_MASK;
3080 else
3081 data = 0;
3082 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3083
3084 data >>= BACKEND_DISABLE_SHIFT;
3085
3086 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3087
3088 return data & mask;
3089}
3090
3091/**
3092 * cik_setup_rb - setup the RBs on the asic
3093 *
3094 * @rdev: radeon_device pointer
3095 * @se_num: number of SEs (shader engines) for the asic
3096 * @sh_per_se: number of SH blocks per SE for the asic
3097 * @max_rb_num_per_se: max RBs (render backends) per SE for the asic
3098 *
3099 * Configures per-SE/SH RB registers (CIK).
3100 */
3101static void cik_setup_rb(struct radeon_device *rdev,
3102 u32 se_num, u32 sh_per_se,
3103 u32 max_rb_num_per_se)
3104{
3105 int i, j;
3106 u32 data, mask;
3107 u32 disabled_rbs = 0;
3108 u32 enabled_rbs = 0;
3109
3110 for (i = 0; i < se_num; i++) {
3111 for (j = 0; j < sh_per_se; j++) {
3112 cik_select_se_sh(rdev, i, j);
3113 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3114 if (rdev->family == CHIP_HAWAII)
3115 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3116 else
3117 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3118 }
3119 }
3120 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3121
3122 mask = 1;
3123 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3124 if (!(disabled_rbs & mask))
3125 enabled_rbs |= mask;
3126 mask <<= 1;
3127 }
3128
3129 rdev->config.cik.backend_enable_mask = enabled_rbs;
3130
3131 for (i = 0; i < se_num; i++) {
3132 cik_select_se_sh(rdev, i, 0xffffffff);
3133 data = 0;
3134 for (j = 0; j < sh_per_se; j++) {
3135 switch (enabled_rbs & 3) {
3136 case 0:
3137 if (j == 0)
3138 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3139 else
3140 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3141 break;
3142 case 1:
3143 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3144 break;
3145 case 2:
3146 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3147 break;
3148 case 3:
3149 default:
3150 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3151 break;
3152 }
3153 enabled_rbs >>= 2;
3154 }
3155 WREG32(PA_SC_RASTER_CONFIG, data);
3156 }
3157 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3158}
3159
3160/**
3161 * cik_gpu_init - setup the 3D engine
3162 *
3163 * @rdev: radeon_device pointer
3164 *
3165 * Configures the 3D engine and tiling configuration
3166 * registers so that the 3D engine is usable.
3167 */
3168static void cik_gpu_init(struct radeon_device *rdev)
3169{
3170 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3171 u32 mc_arb_ramcfg;
3172 u32 hdp_host_path_cntl;
3173 u32 tmp;
3174 int i, j;
3175
3176 switch (rdev->family) {
3177 case CHIP_BONAIRE:
3178 rdev->config.cik.max_shader_engines = 2;
3179 rdev->config.cik.max_tile_pipes = 4;
3180 rdev->config.cik.max_cu_per_sh = 7;
3181 rdev->config.cik.max_sh_per_se = 1;
3182 rdev->config.cik.max_backends_per_se = 2;
3183 rdev->config.cik.max_texture_channel_caches = 4;
3184 rdev->config.cik.max_gprs = 256;
3185 rdev->config.cik.max_gs_threads = 32;
3186 rdev->config.cik.max_hw_contexts = 8;
3187
3188 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3189 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3190 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3191 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3192 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3193 break;
3194 case CHIP_HAWAII:
3195 rdev->config.cik.max_shader_engines = 4;
3196 rdev->config.cik.max_tile_pipes = 16;
3197 rdev->config.cik.max_cu_per_sh = 11;
3198 rdev->config.cik.max_sh_per_se = 1;
3199 rdev->config.cik.max_backends_per_se = 4;
3200 rdev->config.cik.max_texture_channel_caches = 16;
3201 rdev->config.cik.max_gprs = 256;
3202 rdev->config.cik.max_gs_threads = 32;
3203 rdev->config.cik.max_hw_contexts = 8;
3204
3205 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3206 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3207 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3208 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3209 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3210 break;
3211 case CHIP_KAVERI:
3212 rdev->config.cik.max_shader_engines = 1;
3213 rdev->config.cik.max_tile_pipes = 4;
3214 rdev->config.cik.max_cu_per_sh = 8;
3215 rdev->config.cik.max_backends_per_se = 2;
3216 rdev->config.cik.max_sh_per_se = 1;
3217 rdev->config.cik.max_texture_channel_caches = 4;
3218 rdev->config.cik.max_gprs = 256;
3219 rdev->config.cik.max_gs_threads = 16;
3220 rdev->config.cik.max_hw_contexts = 8;
3221
3222 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3223 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3224 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3225 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3226 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3227 break;
3228 case CHIP_KABINI:
3229 case CHIP_MULLINS:
3230 default:
3231 rdev->config.cik.max_shader_engines = 1;
3232 rdev->config.cik.max_tile_pipes = 2;
3233 rdev->config.cik.max_cu_per_sh = 2;
3234 rdev->config.cik.max_sh_per_se = 1;
3235 rdev->config.cik.max_backends_per_se = 1;
3236 rdev->config.cik.max_texture_channel_caches = 2;
3237 rdev->config.cik.max_gprs = 256;
3238 rdev->config.cik.max_gs_threads = 16;
3239 rdev->config.cik.max_hw_contexts = 8;
3240
3241 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3242 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3243 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3244 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3245 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3246 break;
3247 }
3248
3249 /* Initialize HDP */
3250 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3251 WREG32((0x2c14 + j), 0x00000000);
3252 WREG32((0x2c18 + j), 0x00000000);
3253 WREG32((0x2c1c + j), 0x00000000);
3254 WREG32((0x2c20 + j), 0x00000000);
3255 WREG32((0x2c24 + j), 0x00000000);
3256 }
3257
3258 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3259 WREG32(SRBM_INT_CNTL, 0x1);
3260 WREG32(SRBM_INT_ACK, 0x1);
3261
3262 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3263
3264 RREG32(MC_SHARED_CHMAP);
3265 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3266
3267 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3268 rdev->config.cik.mem_max_burst_length_bytes = 256;
3269 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3270 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3271 if (rdev->config.cik.mem_row_size_in_kb > 4)
3272 rdev->config.cik.mem_row_size_in_kb = 4;
3273 /* XXX use MC settings? */
3274 rdev->config.cik.shader_engine_tile_size = 32;
3275 rdev->config.cik.num_gpus = 1;
3276 rdev->config.cik.multi_gpu_tile_size = 64;
3277
3278 /* fix up row size */
3279 gb_addr_config &= ~ROW_SIZE_MASK;
3280 switch (rdev->config.cik.mem_row_size_in_kb) {
3281 case 1:
3282 default:
3283 gb_addr_config |= ROW_SIZE(0);
3284 break;
3285 case 2:
3286 gb_addr_config |= ROW_SIZE(1);
3287 break;
3288 case 4:
3289 gb_addr_config |= ROW_SIZE(2);
3290 break;
3291 }
3292
3293 /* setup tiling info dword. gb_addr_config is not adequate since it does
3294 * not have bank info, so create a custom tiling dword.
3295 * bits 3:0 num_pipes
3296 * bits 7:4 num_banks
3297 * bits 11:8 group_size
3298 * bits 15:12 row_size
3299 */
3300 rdev->config.cik.tile_config = 0;
3301 switch (rdev->config.cik.num_tile_pipes) {
3302 case 1:
3303 rdev->config.cik.tile_config |= (0 << 0);
3304 break;
3305 case 2:
3306 rdev->config.cik.tile_config |= (1 << 0);
3307 break;
3308 case 4:
3309 rdev->config.cik.tile_config |= (2 << 0);
3310 break;
3311 case 8:
3312 default:
3313 /* XXX what about 12? */
3314 rdev->config.cik.tile_config |= (3 << 0);
3315 break;
3316 }
3317 rdev->config.cik.tile_config |=
3318 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3319 rdev->config.cik.tile_config |=
3320 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3321 rdev->config.cik.tile_config |=
3322 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3323
3324 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3325 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3326 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3327 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3328 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3329 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3330 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3331 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3332
3333 cik_tiling_mode_table_init(rdev);
3334
3335 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3336 rdev->config.cik.max_sh_per_se,
3337 rdev->config.cik.max_backends_per_se);
3338
3339 rdev->config.cik.active_cus = 0;
3340 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3341 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3342 rdev->config.cik.active_cus +=
3343 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3344 }
3345 }
3346
3347 /* set HW defaults for 3D engine */
3348 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3349
3350 WREG32(SX_DEBUG_1, 0x20);
3351
3352 WREG32(TA_CNTL_AUX, 0x00010000);
3353
3354 tmp = RREG32(SPI_CONFIG_CNTL);
3355 tmp |= 0x03000000;
3356 WREG32(SPI_CONFIG_CNTL, tmp);
3357
3358 WREG32(SQ_CONFIG, 1);
3359
3360 WREG32(DB_DEBUG, 0);
3361
3362 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3363 tmp |= 0x00000400;
3364 WREG32(DB_DEBUG2, tmp);
3365
3366 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3367 tmp |= 0x00020200;
3368 WREG32(DB_DEBUG3, tmp);
3369
3370 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3371 tmp |= 0x00018208;
3372 WREG32(CB_HW_CONTROL, tmp);
3373
3374 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3375
3376 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3377 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3378 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3379 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3380
3381 WREG32(VGT_NUM_INSTANCES, 1);
3382
3383 WREG32(CP_PERFMON_CNTL, 0);
3384
3385 WREG32(SQ_CONFIG, 0);
3386
3387 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3388 FORCE_EOV_MAX_REZ_CNT(255)));
3389
3390 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3391 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3392
3393 WREG32(VGT_GS_VERTEX_REUSE, 16);
3394 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3395
3396 tmp = RREG32(HDP_MISC_CNTL);
3397 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3398 WREG32(HDP_MISC_CNTL, tmp);
3399
3400 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3401 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3402
3403 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3404 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3405
3406 udelay(50);
3407}
3408
3409/*
3410 * GPU scratch registers helpers function.
3411 */
3412/**
3413 * cik_scratch_init - setup driver info for CP scratch regs
3414 *
3415 * @rdev: radeon_device pointer
3416 *
3417 * Set up the number and offset of the CP scratch registers.
3418 * NOTE: use of CP scratch registers is a legacy inferface and
3419 * is not used by default on newer asics (r6xx+). On newer asics,
3420 * memory buffers are used for fences rather than scratch regs.
3421 */
3422static void cik_scratch_init(struct radeon_device *rdev)
3423{
3424 int i;
3425
3426 rdev->scratch.num_reg = 7;
3427 rdev->scratch.reg_base = SCRATCH_REG0;
3428 for (i = 0; i < rdev->scratch.num_reg; i++) {
3429 rdev->scratch.free[i] = true;
3430 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3431 }
3432}
3433
3434/**
3435 * cik_ring_test - basic gfx ring test
3436 *
3437 * @rdev: radeon_device pointer
3438 * @ring: radeon_ring structure holding ring information
3439 *
3440 * Allocate a scratch register and write to it using the gfx ring (CIK).
3441 * Provides a basic gfx ring test to verify that the ring is working.
3442 * Used by cik_cp_gfx_resume();
3443 * Returns 0 on success, error on failure.
3444 */
3445int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3446{
3447 uint32_t scratch;
3448 uint32_t tmp = 0;
3449 unsigned i;
3450 int r;
3451
3452 r = radeon_scratch_get(rdev, &scratch);
3453 if (r) {
3454 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3455 return r;
3456 }
3457 WREG32(scratch, 0xCAFEDEAD);
3458 r = radeon_ring_lock(rdev, ring, 3);
3459 if (r) {
3460 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3461 radeon_scratch_free(rdev, scratch);
3462 return r;
3463 }
3464 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3465 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3466 radeon_ring_write(ring, 0xDEADBEEF);
3467 radeon_ring_unlock_commit(rdev, ring, false);
3468
3469 for (i = 0; i < rdev->usec_timeout; i++) {
3470 tmp = RREG32(scratch);
3471 if (tmp == 0xDEADBEEF)
3472 break;
3473 udelay(1);
3474 }
3475 if (i < rdev->usec_timeout) {
3476 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3477 } else {
3478 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3479 ring->idx, scratch, tmp);
3480 r = -EINVAL;
3481 }
3482 radeon_scratch_free(rdev, scratch);
3483 return r;
3484}
3485
3486/**
3487 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3488 *
3489 * @rdev: radeon_device pointer
3490 * @ridx: radeon ring index
3491 *
3492 * Emits an hdp flush on the cp.
3493 */
3494static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3495 int ridx)
3496{
3497 struct radeon_ring *ring = &rdev->ring[ridx];
3498 u32 ref_and_mask;
3499
3500 switch (ring->idx) {
3501 case CAYMAN_RING_TYPE_CP1_INDEX:
3502 case CAYMAN_RING_TYPE_CP2_INDEX:
3503 default:
3504 switch (ring->me) {
3505 case 0:
3506 ref_and_mask = CP2 << ring->pipe;
3507 break;
3508 case 1:
3509 ref_and_mask = CP6 << ring->pipe;
3510 break;
3511 default:
3512 return;
3513 }
3514 break;
3515 case RADEON_RING_TYPE_GFX_INDEX:
3516 ref_and_mask = CP0;
3517 break;
3518 }
3519
3520 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3521 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3522 WAIT_REG_MEM_FUNCTION(3) | /* == */
3523 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3524 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3525 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3526 radeon_ring_write(ring, ref_and_mask);
3527 radeon_ring_write(ring, ref_and_mask);
3528 radeon_ring_write(ring, 0x20); /* poll interval */
3529}
3530
3531/**
3532 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3533 *
3534 * @rdev: radeon_device pointer
3535 * @fence: radeon fence object
3536 *
3537 * Emits a fence sequnce number on the gfx ring and flushes
3538 * GPU caches.
3539 */
3540void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3541 struct radeon_fence *fence)
3542{
3543 struct radeon_ring *ring = &rdev->ring[fence->ring];
3544 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3545
3546 /* Workaround for cache flush problems. First send a dummy EOP
3547 * event down the pipe with seq one below.
3548 */
3549 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3550 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3551 EOP_TC_ACTION_EN |
3552 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3553 EVENT_INDEX(5)));
3554 radeon_ring_write(ring, addr & 0xfffffffc);
3555 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3556 DATA_SEL(1) | INT_SEL(0));
3557 radeon_ring_write(ring, fence->seq - 1);
3558 radeon_ring_write(ring, 0);
3559
3560 /* Then send the real EOP event down the pipe. */
3561 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3562 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3563 EOP_TC_ACTION_EN |
3564 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3565 EVENT_INDEX(5)));
3566 radeon_ring_write(ring, addr & 0xfffffffc);
3567 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3568 radeon_ring_write(ring, fence->seq);
3569 radeon_ring_write(ring, 0);
3570}
3571
3572/**
3573 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3574 *
3575 * @rdev: radeon_device pointer
3576 * @fence: radeon fence object
3577 *
3578 * Emits a fence sequnce number on the compute ring and flushes
3579 * GPU caches.
3580 */
3581void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3582 struct radeon_fence *fence)
3583{
3584 struct radeon_ring *ring = &rdev->ring[fence->ring];
3585 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3586
3587 /* RELEASE_MEM - flush caches, send int */
3588 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3589 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3590 EOP_TC_ACTION_EN |
3591 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3592 EVENT_INDEX(5)));
3593 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3594 radeon_ring_write(ring, addr & 0xfffffffc);
3595 radeon_ring_write(ring, upper_32_bits(addr));
3596 radeon_ring_write(ring, fence->seq);
3597 radeon_ring_write(ring, 0);
3598}
3599
3600/**
3601 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3602 *
3603 * @rdev: radeon_device pointer
3604 * @ring: radeon ring buffer object
3605 * @semaphore: radeon semaphore object
3606 * @emit_wait: Is this a semaphore wait?
3607 *
3608 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3609 * from running ahead of semaphore waits.
3610 */
3611bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3612 struct radeon_ring *ring,
3613 struct radeon_semaphore *semaphore,
3614 bool emit_wait)
3615{
3616 uint64_t addr = semaphore->gpu_addr;
3617 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3618
3619 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3620 radeon_ring_write(ring, lower_32_bits(addr));
3621 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3622
3623 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3624 /* Prevent the PFP from running ahead of the semaphore wait */
3625 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3626 radeon_ring_write(ring, 0x0);
3627 }
3628
3629 return true;
3630}
3631
3632/**
3633 * cik_copy_cpdma - copy pages using the CP DMA engine
3634 *
3635 * @rdev: radeon_device pointer
3636 * @src_offset: src GPU address
3637 * @dst_offset: dst GPU address
3638 * @num_gpu_pages: number of GPU pages to xfer
3639 * @resv: reservation object to sync to
3640 *
3641 * Copy GPU paging using the CP DMA engine (CIK+).
3642 * Used by the radeon ttm implementation to move pages if
3643 * registered as the asic copy callback.
3644 */
3645struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3646 uint64_t src_offset, uint64_t dst_offset,
3647 unsigned num_gpu_pages,
3648 struct dma_resv *resv)
3649{
3650 struct radeon_fence *fence;
3651 struct radeon_sync sync;
3652 int ring_index = rdev->asic->copy.blit_ring_index;
3653 struct radeon_ring *ring = &rdev->ring[ring_index];
3654 u32 size_in_bytes, cur_size_in_bytes, control;
3655 int i, num_loops;
3656 int r = 0;
3657
3658 radeon_sync_create(&sync);
3659
3660 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3661 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3662 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3663 if (r) {
3664 DRM_ERROR("radeon: moving bo (%d).\n", r);
3665 radeon_sync_free(rdev, &sync, NULL);
3666 return ERR_PTR(r);
3667 }
3668
3669 radeon_sync_resv(rdev, &sync, resv, false);
3670 radeon_sync_rings(rdev, &sync, ring->idx);
3671
3672 for (i = 0; i < num_loops; i++) {
3673 cur_size_in_bytes = size_in_bytes;
3674 if (cur_size_in_bytes > 0x1fffff)
3675 cur_size_in_bytes = 0x1fffff;
3676 size_in_bytes -= cur_size_in_bytes;
3677 control = 0;
3678 if (size_in_bytes == 0)
3679 control |= PACKET3_DMA_DATA_CP_SYNC;
3680 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3681 radeon_ring_write(ring, control);
3682 radeon_ring_write(ring, lower_32_bits(src_offset));
3683 radeon_ring_write(ring, upper_32_bits(src_offset));
3684 radeon_ring_write(ring, lower_32_bits(dst_offset));
3685 radeon_ring_write(ring, upper_32_bits(dst_offset));
3686 radeon_ring_write(ring, cur_size_in_bytes);
3687 src_offset += cur_size_in_bytes;
3688 dst_offset += cur_size_in_bytes;
3689 }
3690
3691 r = radeon_fence_emit(rdev, &fence, ring->idx);
3692 if (r) {
3693 radeon_ring_unlock_undo(rdev, ring);
3694 radeon_sync_free(rdev, &sync, NULL);
3695 return ERR_PTR(r);
3696 }
3697
3698 radeon_ring_unlock_commit(rdev, ring, false);
3699 radeon_sync_free(rdev, &sync, fence);
3700
3701 return fence;
3702}
3703
3704/*
3705 * IB stuff
3706 */
3707/**
3708 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3709 *
3710 * @rdev: radeon_device pointer
3711 * @ib: radeon indirect buffer object
3712 *
3713 * Emits a DE (drawing engine) or CE (constant engine) IB
3714 * on the gfx ring. IBs are usually generated by userspace
3715 * acceleration drivers and submitted to the kernel for
3716 * scheduling on the ring. This function schedules the IB
3717 * on the gfx ring for execution by the GPU.
3718 */
3719void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3720{
3721 struct radeon_ring *ring = &rdev->ring[ib->ring];
3722 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3723 u32 header, control = INDIRECT_BUFFER_VALID;
3724
3725 if (ib->is_const_ib) {
3726 /* set switch buffer packet before const IB */
3727 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3728 radeon_ring_write(ring, 0);
3729
3730 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3731 } else {
3732 u32 next_rptr;
3733 if (ring->rptr_save_reg) {
3734 next_rptr = ring->wptr + 3 + 4;
3735 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3736 radeon_ring_write(ring, ((ring->rptr_save_reg -
3737 PACKET3_SET_UCONFIG_REG_START) >> 2));
3738 radeon_ring_write(ring, next_rptr);
3739 } else if (rdev->wb.enabled) {
3740 next_rptr = ring->wptr + 5 + 4;
3741 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3742 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3743 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3744 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3745 radeon_ring_write(ring, next_rptr);
3746 }
3747
3748 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3749 }
3750
3751 control |= ib->length_dw | (vm_id << 24);
3752
3753 radeon_ring_write(ring, header);
3754 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3755 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3756 radeon_ring_write(ring, control);
3757}
3758
3759/**
3760 * cik_ib_test - basic gfx ring IB test
3761 *
3762 * @rdev: radeon_device pointer
3763 * @ring: radeon_ring structure holding ring information
3764 *
3765 * Allocate an IB and execute it on the gfx ring (CIK).
3766 * Provides a basic gfx ring test to verify that IBs are working.
3767 * Returns 0 on success, error on failure.
3768 */
3769int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3770{
3771 struct radeon_ib ib;
3772 uint32_t scratch;
3773 uint32_t tmp = 0;
3774 unsigned i;
3775 int r;
3776
3777 r = radeon_scratch_get(rdev, &scratch);
3778 if (r) {
3779 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3780 return r;
3781 }
3782 WREG32(scratch, 0xCAFEDEAD);
3783 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3784 if (r) {
3785 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3786 radeon_scratch_free(rdev, scratch);
3787 return r;
3788 }
3789 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3790 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3791 ib.ptr[2] = 0xDEADBEEF;
3792 ib.length_dw = 3;
3793 r = radeon_ib_schedule(rdev, &ib, NULL, false);
3794 if (r) {
3795 radeon_scratch_free(rdev, scratch);
3796 radeon_ib_free(rdev, &ib);
3797 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3798 return r;
3799 }
3800 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3801 RADEON_USEC_IB_TEST_TIMEOUT));
3802 if (r < 0) {
3803 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3804 radeon_scratch_free(rdev, scratch);
3805 radeon_ib_free(rdev, &ib);
3806 return r;
3807 } else if (r == 0) {
3808 DRM_ERROR("radeon: fence wait timed out.\n");
3809 radeon_scratch_free(rdev, scratch);
3810 radeon_ib_free(rdev, &ib);
3811 return -ETIMEDOUT;
3812 }
3813 r = 0;
3814 for (i = 0; i < rdev->usec_timeout; i++) {
3815 tmp = RREG32(scratch);
3816 if (tmp == 0xDEADBEEF)
3817 break;
3818 udelay(1);
3819 }
3820 if (i < rdev->usec_timeout) {
3821 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3822 } else {
3823 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3824 scratch, tmp);
3825 r = -EINVAL;
3826 }
3827 radeon_scratch_free(rdev, scratch);
3828 radeon_ib_free(rdev, &ib);
3829 return r;
3830}
3831
3832/*
3833 * CP.
3834 * On CIK, gfx and compute now have independant command processors.
3835 *
3836 * GFX
3837 * Gfx consists of a single ring and can process both gfx jobs and
3838 * compute jobs. The gfx CP consists of three microengines (ME):
3839 * PFP - Pre-Fetch Parser
3840 * ME - Micro Engine
3841 * CE - Constant Engine
3842 * The PFP and ME make up what is considered the Drawing Engine (DE).
3843 * The CE is an asynchronous engine used for updating buffer desciptors
3844 * used by the DE so that they can be loaded into cache in parallel
3845 * while the DE is processing state update packets.
3846 *
3847 * Compute
3848 * The compute CP consists of two microengines (ME):
3849 * MEC1 - Compute MicroEngine 1
3850 * MEC2 - Compute MicroEngine 2
3851 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3852 * The queues are exposed to userspace and are programmed directly
3853 * by the compute runtime.
3854 */
3855/**
3856 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3857 *
3858 * @rdev: radeon_device pointer
3859 * @enable: enable or disable the MEs
3860 *
3861 * Halts or unhalts the gfx MEs.
3862 */
3863static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3864{
3865 if (enable)
3866 WREG32(CP_ME_CNTL, 0);
3867 else {
3868 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3869 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3870 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3871 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3872 }
3873 udelay(50);
3874}
3875
3876/**
3877 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3878 *
3879 * @rdev: radeon_device pointer
3880 *
3881 * Loads the gfx PFP, ME, and CE ucode.
3882 * Returns 0 for success, -EINVAL if the ucode is not available.
3883 */
3884static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3885{
3886 int i;
3887
3888 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3889 return -EINVAL;
3890
3891 cik_cp_gfx_enable(rdev, false);
3892
3893 if (rdev->new_fw) {
3894 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3895 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3896 const struct gfx_firmware_header_v1_0 *ce_hdr =
3897 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3898 const struct gfx_firmware_header_v1_0 *me_hdr =
3899 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3900 const __le32 *fw_data;
3901 u32 fw_size;
3902
3903 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3904 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3905 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3906
3907 /* PFP */
3908 fw_data = (const __le32 *)
3909 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3910 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3911 WREG32(CP_PFP_UCODE_ADDR, 0);
3912 for (i = 0; i < fw_size; i++)
3913 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3914 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3915
3916 /* CE */
3917 fw_data = (const __le32 *)
3918 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3919 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3920 WREG32(CP_CE_UCODE_ADDR, 0);
3921 for (i = 0; i < fw_size; i++)
3922 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3923 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3924
3925 /* ME */
3926 fw_data = (const __be32 *)
3927 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3928 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3929 WREG32(CP_ME_RAM_WADDR, 0);
3930 for (i = 0; i < fw_size; i++)
3931 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3932 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3933 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3934 } else {
3935 const __be32 *fw_data;
3936
3937 /* PFP */
3938 fw_data = (const __be32 *)rdev->pfp_fw->data;
3939 WREG32(CP_PFP_UCODE_ADDR, 0);
3940 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3941 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3942 WREG32(CP_PFP_UCODE_ADDR, 0);
3943
3944 /* CE */
3945 fw_data = (const __be32 *)rdev->ce_fw->data;
3946 WREG32(CP_CE_UCODE_ADDR, 0);
3947 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3948 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3949 WREG32(CP_CE_UCODE_ADDR, 0);
3950
3951 /* ME */
3952 fw_data = (const __be32 *)rdev->me_fw->data;
3953 WREG32(CP_ME_RAM_WADDR, 0);
3954 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3955 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3956 WREG32(CP_ME_RAM_WADDR, 0);
3957 }
3958
3959 return 0;
3960}
3961
3962/**
3963 * cik_cp_gfx_start - start the gfx ring
3964 *
3965 * @rdev: radeon_device pointer
3966 *
3967 * Enables the ring and loads the clear state context and other
3968 * packets required to init the ring.
3969 * Returns 0 for success, error for failure.
3970 */
3971static int cik_cp_gfx_start(struct radeon_device *rdev)
3972{
3973 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3974 int r, i;
3975
3976 /* init the CP */
3977 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3978 WREG32(CP_ENDIAN_SWAP, 0);
3979 WREG32(CP_DEVICE_ID, 1);
3980
3981 cik_cp_gfx_enable(rdev, true);
3982
3983 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3984 if (r) {
3985 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3986 return r;
3987 }
3988
3989 /* init the CE partitions. CE only used for gfx on CIK */
3990 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3991 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3992 radeon_ring_write(ring, 0x8000);
3993 radeon_ring_write(ring, 0x8000);
3994
3995 /* setup clear context state */
3996 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3997 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3998
3999 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4000 radeon_ring_write(ring, 0x80000000);
4001 radeon_ring_write(ring, 0x80000000);
4002
4003 for (i = 0; i < cik_default_size; i++)
4004 radeon_ring_write(ring, cik_default_state[i]);
4005
4006 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4007 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4008
4009 /* set clear context state */
4010 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4011 radeon_ring_write(ring, 0);
4012
4013 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4014 radeon_ring_write(ring, 0x00000316);
4015 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4016 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4017
4018 radeon_ring_unlock_commit(rdev, ring, false);
4019
4020 return 0;
4021}
4022
4023/**
4024 * cik_cp_gfx_fini - stop the gfx ring
4025 *
4026 * @rdev: radeon_device pointer
4027 *
4028 * Stop the gfx ring and tear down the driver ring
4029 * info.
4030 */
4031static void cik_cp_gfx_fini(struct radeon_device *rdev)
4032{
4033 cik_cp_gfx_enable(rdev, false);
4034 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4035}
4036
4037/**
4038 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Program the location and size of the gfx ring buffer
4043 * and test it to make sure it's working.
4044 * Returns 0 for success, error for failure.
4045 */
4046static int cik_cp_gfx_resume(struct radeon_device *rdev)
4047{
4048 struct radeon_ring *ring;
4049 u32 tmp;
4050 u32 rb_bufsz;
4051 u64 rb_addr;
4052 int r;
4053
4054 WREG32(CP_SEM_WAIT_TIMER, 0x0);
4055 if (rdev->family != CHIP_HAWAII)
4056 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4057
4058 /* Set the write pointer delay */
4059 WREG32(CP_RB_WPTR_DELAY, 0);
4060
4061 /* set the RB to use vmid 0 */
4062 WREG32(CP_RB_VMID, 0);
4063
4064 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4065
4066 /* ring 0 - compute and gfx */
4067 /* Set ring buffer size */
4068 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4069 rb_bufsz = order_base_2(ring->ring_size / 8);
4070 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4071#ifdef __BIG_ENDIAN
4072 tmp |= BUF_SWAP_32BIT;
4073#endif
4074 WREG32(CP_RB0_CNTL, tmp);
4075
4076 /* Initialize the ring buffer's read and write pointers */
4077 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4078 ring->wptr = 0;
4079 WREG32(CP_RB0_WPTR, ring->wptr);
4080
4081 /* set the wb address wether it's enabled or not */
4082 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4083 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4084
4085 /* scratch register shadowing is no longer supported */
4086 WREG32(SCRATCH_UMSK, 0);
4087
4088 if (!rdev->wb.enabled)
4089 tmp |= RB_NO_UPDATE;
4090
4091 mdelay(1);
4092 WREG32(CP_RB0_CNTL, tmp);
4093
4094 rb_addr = ring->gpu_addr >> 8;
4095 WREG32(CP_RB0_BASE, rb_addr);
4096 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4097
4098 /* start the ring */
4099 cik_cp_gfx_start(rdev);
4100 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4101 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4102 if (r) {
4103 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4104 return r;
4105 }
4106
4107 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4108 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4109
4110 return 0;
4111}
4112
4113u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4114 struct radeon_ring *ring)
4115{
4116 u32 rptr;
4117
4118 if (rdev->wb.enabled)
4119 rptr = rdev->wb.wb[ring->rptr_offs/4];
4120 else
4121 rptr = RREG32(CP_RB0_RPTR);
4122
4123 return rptr;
4124}
4125
4126u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4127 struct radeon_ring *ring)
4128{
4129 return RREG32(CP_RB0_WPTR);
4130}
4131
4132void cik_gfx_set_wptr(struct radeon_device *rdev,
4133 struct radeon_ring *ring)
4134{
4135 WREG32(CP_RB0_WPTR, ring->wptr);
4136 (void)RREG32(CP_RB0_WPTR);
4137}
4138
4139u32 cik_compute_get_rptr(struct radeon_device *rdev,
4140 struct radeon_ring *ring)
4141{
4142 u32 rptr;
4143
4144 if (rdev->wb.enabled) {
4145 rptr = rdev->wb.wb[ring->rptr_offs/4];
4146 } else {
4147 mutex_lock(&rdev->srbm_mutex);
4148 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4149 rptr = RREG32(CP_HQD_PQ_RPTR);
4150 cik_srbm_select(rdev, 0, 0, 0, 0);
4151 mutex_unlock(&rdev->srbm_mutex);
4152 }
4153
4154 return rptr;
4155}
4156
4157u32 cik_compute_get_wptr(struct radeon_device *rdev,
4158 struct radeon_ring *ring)
4159{
4160 u32 wptr;
4161
4162 if (rdev->wb.enabled) {
4163 /* XXX check if swapping is necessary on BE */
4164 wptr = rdev->wb.wb[ring->wptr_offs/4];
4165 } else {
4166 mutex_lock(&rdev->srbm_mutex);
4167 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4168 wptr = RREG32(CP_HQD_PQ_WPTR);
4169 cik_srbm_select(rdev, 0, 0, 0, 0);
4170 mutex_unlock(&rdev->srbm_mutex);
4171 }
4172
4173 return wptr;
4174}
4175
4176void cik_compute_set_wptr(struct radeon_device *rdev,
4177 struct radeon_ring *ring)
4178{
4179 /* XXX check if swapping is necessary on BE */
4180 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4181 WDOORBELL32(ring->doorbell_index, ring->wptr);
4182}
4183
4184static void cik_compute_stop(struct radeon_device *rdev,
4185 struct radeon_ring *ring)
4186{
4187 u32 j, tmp;
4188
4189 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4190 /* Disable wptr polling. */
4191 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4192 tmp &= ~WPTR_POLL_EN;
4193 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4194 /* Disable HQD. */
4195 if (RREG32(CP_HQD_ACTIVE) & 1) {
4196 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4197 for (j = 0; j < rdev->usec_timeout; j++) {
4198 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4199 break;
4200 udelay(1);
4201 }
4202 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4203 WREG32(CP_HQD_PQ_RPTR, 0);
4204 WREG32(CP_HQD_PQ_WPTR, 0);
4205 }
4206 cik_srbm_select(rdev, 0, 0, 0, 0);
4207}
4208
4209/**
4210 * cik_cp_compute_enable - enable/disable the compute CP MEs
4211 *
4212 * @rdev: radeon_device pointer
4213 * @enable: enable or disable the MEs
4214 *
4215 * Halts or unhalts the compute MEs.
4216 */
4217static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4218{
4219 if (enable)
4220 WREG32(CP_MEC_CNTL, 0);
4221 else {
4222 /*
4223 * To make hibernation reliable we need to clear compute ring
4224 * configuration before halting the compute ring.
4225 */
4226 mutex_lock(&rdev->srbm_mutex);
4227 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4228 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4229 mutex_unlock(&rdev->srbm_mutex);
4230
4231 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4232 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4233 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4234 }
4235 udelay(50);
4236}
4237
4238/**
4239 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4240 *
4241 * @rdev: radeon_device pointer
4242 *
4243 * Loads the compute MEC1&2 ucode.
4244 * Returns 0 for success, -EINVAL if the ucode is not available.
4245 */
4246static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4247{
4248 int i;
4249
4250 if (!rdev->mec_fw)
4251 return -EINVAL;
4252
4253 cik_cp_compute_enable(rdev, false);
4254
4255 if (rdev->new_fw) {
4256 const struct gfx_firmware_header_v1_0 *mec_hdr =
4257 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4258 const __le32 *fw_data;
4259 u32 fw_size;
4260
4261 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4262
4263 /* MEC1 */
4264 fw_data = (const __le32 *)
4265 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4266 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4267 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4268 for (i = 0; i < fw_size; i++)
4269 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4270 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4271
4272 /* MEC2 */
4273 if (rdev->family == CHIP_KAVERI) {
4274 const struct gfx_firmware_header_v1_0 *mec2_hdr =
4275 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4276
4277 fw_data = (const __le32 *)
4278 (rdev->mec2_fw->data +
4279 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4280 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4281 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282 for (i = 0; i < fw_size; i++)
4283 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4284 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4285 }
4286 } else {
4287 const __be32 *fw_data;
4288
4289 /* MEC1 */
4290 fw_data = (const __be32 *)rdev->mec_fw->data;
4291 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4292 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4293 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4294 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4295
4296 if (rdev->family == CHIP_KAVERI) {
4297 /* MEC2 */
4298 fw_data = (const __be32 *)rdev->mec_fw->data;
4299 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4300 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4301 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4302 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4303 }
4304 }
4305
4306 return 0;
4307}
4308
4309/**
4310 * cik_cp_compute_start - start the compute queues
4311 *
4312 * @rdev: radeon_device pointer
4313 *
4314 * Enable the compute queues.
4315 * Returns 0 for success, error for failure.
4316 */
4317static int cik_cp_compute_start(struct radeon_device *rdev)
4318{
4319 cik_cp_compute_enable(rdev, true);
4320
4321 return 0;
4322}
4323
4324/**
4325 * cik_cp_compute_fini - stop the compute queues
4326 *
4327 * @rdev: radeon_device pointer
4328 *
4329 * Stop the compute queues and tear down the driver queue
4330 * info.
4331 */
4332static void cik_cp_compute_fini(struct radeon_device *rdev)
4333{
4334 int i, idx, r;
4335
4336 cik_cp_compute_enable(rdev, false);
4337
4338 for (i = 0; i < 2; i++) {
4339 if (i == 0)
4340 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4341 else
4342 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4343
4344 if (rdev->ring[idx].mqd_obj) {
4345 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4346 if (unlikely(r != 0))
4347 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4348
4349 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4350 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4351
4352 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4353 rdev->ring[idx].mqd_obj = NULL;
4354 }
4355 }
4356}
4357
4358static void cik_mec_fini(struct radeon_device *rdev)
4359{
4360 int r;
4361
4362 if (rdev->mec.hpd_eop_obj) {
4363 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4364 if (unlikely(r != 0))
4365 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4366 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4367 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4368
4369 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4370 rdev->mec.hpd_eop_obj = NULL;
4371 }
4372}
4373
4374#define MEC_HPD_SIZE 2048
4375
4376static int cik_mec_init(struct radeon_device *rdev)
4377{
4378 int r;
4379 u32 *hpd;
4380
4381 /*
4382 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4383 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4384 */
4385 if (rdev->family == CHIP_KAVERI)
4386 rdev->mec.num_mec = 2;
4387 else
4388 rdev->mec.num_mec = 1;
4389 rdev->mec.num_pipe = 4;
4390 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4391
4392 if (rdev->mec.hpd_eop_obj == NULL) {
4393 r = radeon_bo_create(rdev,
4394 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4395 PAGE_SIZE, true,
4396 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4397 &rdev->mec.hpd_eop_obj);
4398 if (r) {
4399 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4400 return r;
4401 }
4402 }
4403
4404 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4405 if (unlikely(r != 0)) {
4406 cik_mec_fini(rdev);
4407 return r;
4408 }
4409 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4410 &rdev->mec.hpd_eop_gpu_addr);
4411 if (r) {
4412 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4413 cik_mec_fini(rdev);
4414 return r;
4415 }
4416 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4417 if (r) {
4418 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4419 cik_mec_fini(rdev);
4420 return r;
4421 }
4422
4423 /* clear memory. Not sure if this is required or not */
4424 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4425
4426 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4427 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4428
4429 return 0;
4430}
4431
4432struct hqd_registers
4433{
4434 u32 cp_mqd_base_addr;
4435 u32 cp_mqd_base_addr_hi;
4436 u32 cp_hqd_active;
4437 u32 cp_hqd_vmid;
4438 u32 cp_hqd_persistent_state;
4439 u32 cp_hqd_pipe_priority;
4440 u32 cp_hqd_queue_priority;
4441 u32 cp_hqd_quantum;
4442 u32 cp_hqd_pq_base;
4443 u32 cp_hqd_pq_base_hi;
4444 u32 cp_hqd_pq_rptr;
4445 u32 cp_hqd_pq_rptr_report_addr;
4446 u32 cp_hqd_pq_rptr_report_addr_hi;
4447 u32 cp_hqd_pq_wptr_poll_addr;
4448 u32 cp_hqd_pq_wptr_poll_addr_hi;
4449 u32 cp_hqd_pq_doorbell_control;
4450 u32 cp_hqd_pq_wptr;
4451 u32 cp_hqd_pq_control;
4452 u32 cp_hqd_ib_base_addr;
4453 u32 cp_hqd_ib_base_addr_hi;
4454 u32 cp_hqd_ib_rptr;
4455 u32 cp_hqd_ib_control;
4456 u32 cp_hqd_iq_timer;
4457 u32 cp_hqd_iq_rptr;
4458 u32 cp_hqd_dequeue_request;
4459 u32 cp_hqd_dma_offload;
4460 u32 cp_hqd_sema_cmd;
4461 u32 cp_hqd_msg_type;
4462 u32 cp_hqd_atomic0_preop_lo;
4463 u32 cp_hqd_atomic0_preop_hi;
4464 u32 cp_hqd_atomic1_preop_lo;
4465 u32 cp_hqd_atomic1_preop_hi;
4466 u32 cp_hqd_hq_scheduler0;
4467 u32 cp_hqd_hq_scheduler1;
4468 u32 cp_mqd_control;
4469};
4470
4471struct bonaire_mqd
4472{
4473 u32 header;
4474 u32 dispatch_initiator;
4475 u32 dimensions[3];
4476 u32 start_idx[3];
4477 u32 num_threads[3];
4478 u32 pipeline_stat_enable;
4479 u32 perf_counter_enable;
4480 u32 pgm[2];
4481 u32 tba[2];
4482 u32 tma[2];
4483 u32 pgm_rsrc[2];
4484 u32 vmid;
4485 u32 resource_limits;
4486 u32 static_thread_mgmt01[2];
4487 u32 tmp_ring_size;
4488 u32 static_thread_mgmt23[2];
4489 u32 restart[3];
4490 u32 thread_trace_enable;
4491 u32 reserved1;
4492 u32 user_data[16];
4493 u32 vgtcs_invoke_count[2];
4494 struct hqd_registers queue_state;
4495 u32 dequeue_cntr;
4496 u32 interrupt_queue[64];
4497};
4498
4499/**
4500 * cik_cp_compute_resume - setup the compute queue registers
4501 *
4502 * @rdev: radeon_device pointer
4503 *
4504 * Program the compute queues and test them to make sure they
4505 * are working.
4506 * Returns 0 for success, error for failure.
4507 */
4508static int cik_cp_compute_resume(struct radeon_device *rdev)
4509{
4510 int r, i, j, idx;
4511 u32 tmp;
4512 bool use_doorbell = true;
4513 u64 hqd_gpu_addr;
4514 u64 mqd_gpu_addr;
4515 u64 eop_gpu_addr;
4516 u64 wb_gpu_addr;
4517 u32 *buf;
4518 struct bonaire_mqd *mqd;
4519
4520 r = cik_cp_compute_start(rdev);
4521 if (r)
4522 return r;
4523
4524 /* fix up chicken bits */
4525 tmp = RREG32(CP_CPF_DEBUG);
4526 tmp |= (1 << 23);
4527 WREG32(CP_CPF_DEBUG, tmp);
4528
4529 /* init the pipes */
4530 mutex_lock(&rdev->srbm_mutex);
4531
4532 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4533 int me = (i < 4) ? 1 : 2;
4534 int pipe = (i < 4) ? i : (i - 4);
4535
4536 cik_srbm_select(rdev, me, pipe, 0, 0);
4537
4538 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4539 /* write the EOP addr */
4540 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4541 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4542
4543 /* set the VMID assigned */
4544 WREG32(CP_HPD_EOP_VMID, 0);
4545
4546 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4547 tmp = RREG32(CP_HPD_EOP_CONTROL);
4548 tmp &= ~EOP_SIZE_MASK;
4549 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4550 WREG32(CP_HPD_EOP_CONTROL, tmp);
4551
4552 }
4553 cik_srbm_select(rdev, 0, 0, 0, 0);
4554 mutex_unlock(&rdev->srbm_mutex);
4555
4556 /* init the queues. Just two for now. */
4557 for (i = 0; i < 2; i++) {
4558 if (i == 0)
4559 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4560 else
4561 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4562
4563 if (rdev->ring[idx].mqd_obj == NULL) {
4564 r = radeon_bo_create(rdev,
4565 sizeof(struct bonaire_mqd),
4566 PAGE_SIZE, true,
4567 RADEON_GEM_DOMAIN_GTT, 0, NULL,
4568 NULL, &rdev->ring[idx].mqd_obj);
4569 if (r) {
4570 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4571 return r;
4572 }
4573 }
4574
4575 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4576 if (unlikely(r != 0)) {
4577 cik_cp_compute_fini(rdev);
4578 return r;
4579 }
4580 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4581 &mqd_gpu_addr);
4582 if (r) {
4583 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4584 cik_cp_compute_fini(rdev);
4585 return r;
4586 }
4587 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4588 if (r) {
4589 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4590 cik_cp_compute_fini(rdev);
4591 return r;
4592 }
4593
4594 /* init the mqd struct */
4595 memset(buf, 0, sizeof(struct bonaire_mqd));
4596
4597 mqd = (struct bonaire_mqd *)buf;
4598 mqd->header = 0xC0310800;
4599 mqd->static_thread_mgmt01[0] = 0xffffffff;
4600 mqd->static_thread_mgmt01[1] = 0xffffffff;
4601 mqd->static_thread_mgmt23[0] = 0xffffffff;
4602 mqd->static_thread_mgmt23[1] = 0xffffffff;
4603
4604 mutex_lock(&rdev->srbm_mutex);
4605 cik_srbm_select(rdev, rdev->ring[idx].me,
4606 rdev->ring[idx].pipe,
4607 rdev->ring[idx].queue, 0);
4608
4609 /* disable wptr polling */
4610 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611 tmp &= ~WPTR_POLL_EN;
4612 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613
4614 /* enable doorbell? */
4615 mqd->queue_state.cp_hqd_pq_doorbell_control =
4616 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4617 if (use_doorbell)
4618 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4619 else
4620 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4621 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4622 mqd->queue_state.cp_hqd_pq_doorbell_control);
4623
4624 /* disable the queue if it's active */
4625 mqd->queue_state.cp_hqd_dequeue_request = 0;
4626 mqd->queue_state.cp_hqd_pq_rptr = 0;
4627 mqd->queue_state.cp_hqd_pq_wptr= 0;
4628 if (RREG32(CP_HQD_ACTIVE) & 1) {
4629 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4630 for (j = 0; j < rdev->usec_timeout; j++) {
4631 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4632 break;
4633 udelay(1);
4634 }
4635 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4636 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4637 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4638 }
4639
4640 /* set the pointer to the MQD */
4641 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4642 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4643 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4644 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4645 /* set MQD vmid to 0 */
4646 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4647 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4648 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4649
4650 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4651 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4652 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4653 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4654 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4655 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4656
4657 /* set up the HQD, this is similar to CP_RB0_CNTL */
4658 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4659 mqd->queue_state.cp_hqd_pq_control &=
4660 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4661
4662 mqd->queue_state.cp_hqd_pq_control |=
4663 order_base_2(rdev->ring[idx].ring_size / 8);
4664 mqd->queue_state.cp_hqd_pq_control |=
4665 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4666#ifdef __BIG_ENDIAN
4667 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4668#endif
4669 mqd->queue_state.cp_hqd_pq_control &=
4670 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4671 mqd->queue_state.cp_hqd_pq_control |=
4672 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4673 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4674
4675 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4676 if (i == 0)
4677 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4678 else
4679 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4680 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4681 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4682 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4683 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4684 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4685
4686 /* set the wb address wether it's enabled or not */
4687 if (i == 0)
4688 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4689 else
4690 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4691 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4692 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4693 upper_32_bits(wb_gpu_addr) & 0xffff;
4694 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4695 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4696 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4697 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4698
4699 /* enable the doorbell if requested */
4700 if (use_doorbell) {
4701 mqd->queue_state.cp_hqd_pq_doorbell_control =
4702 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4703 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4704 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4705 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4706 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4707 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4708 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4709
4710 } else {
4711 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4712 }
4713 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4714 mqd->queue_state.cp_hqd_pq_doorbell_control);
4715
4716 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4717 rdev->ring[idx].wptr = 0;
4718 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4719 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4720 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4721
4722 /* set the vmid for the queue */
4723 mqd->queue_state.cp_hqd_vmid = 0;
4724 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4725
4726 /* activate the queue */
4727 mqd->queue_state.cp_hqd_active = 1;
4728 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4729
4730 cik_srbm_select(rdev, 0, 0, 0, 0);
4731 mutex_unlock(&rdev->srbm_mutex);
4732
4733 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4734 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4735
4736 rdev->ring[idx].ready = true;
4737 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4738 if (r)
4739 rdev->ring[idx].ready = false;
4740 }
4741
4742 return 0;
4743}
4744
4745static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4746{
4747 cik_cp_gfx_enable(rdev, enable);
4748 cik_cp_compute_enable(rdev, enable);
4749}
4750
4751static int cik_cp_load_microcode(struct radeon_device *rdev)
4752{
4753 int r;
4754
4755 r = cik_cp_gfx_load_microcode(rdev);
4756 if (r)
4757 return r;
4758 r = cik_cp_compute_load_microcode(rdev);
4759 if (r)
4760 return r;
4761
4762 return 0;
4763}
4764
4765static void cik_cp_fini(struct radeon_device *rdev)
4766{
4767 cik_cp_gfx_fini(rdev);
4768 cik_cp_compute_fini(rdev);
4769}
4770
4771static int cik_cp_resume(struct radeon_device *rdev)
4772{
4773 int r;
4774
4775 cik_enable_gui_idle_interrupt(rdev, false);
4776
4777 r = cik_cp_load_microcode(rdev);
4778 if (r)
4779 return r;
4780
4781 r = cik_cp_gfx_resume(rdev);
4782 if (r)
4783 return r;
4784 r = cik_cp_compute_resume(rdev);
4785 if (r)
4786 return r;
4787
4788 cik_enable_gui_idle_interrupt(rdev, true);
4789
4790 return 0;
4791}
4792
4793static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4794{
4795 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4796 RREG32(GRBM_STATUS));
4797 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4798 RREG32(GRBM_STATUS2));
4799 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4800 RREG32(GRBM_STATUS_SE0));
4801 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4802 RREG32(GRBM_STATUS_SE1));
4803 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4804 RREG32(GRBM_STATUS_SE2));
4805 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4806 RREG32(GRBM_STATUS_SE3));
4807 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4808 RREG32(SRBM_STATUS));
4809 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4810 RREG32(SRBM_STATUS2));
4811 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4812 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4813 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4814 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4815 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4816 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4817 RREG32(CP_STALLED_STAT1));
4818 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4819 RREG32(CP_STALLED_STAT2));
4820 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4821 RREG32(CP_STALLED_STAT3));
4822 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4823 RREG32(CP_CPF_BUSY_STAT));
4824 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4825 RREG32(CP_CPF_STALLED_STAT1));
4826 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4827 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4828 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4829 RREG32(CP_CPC_STALLED_STAT1));
4830 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4831}
4832
4833/**
4834 * cik_gpu_check_soft_reset - check which blocks are busy
4835 *
4836 * @rdev: radeon_device pointer
4837 *
4838 * Check which blocks are busy and return the relevant reset
4839 * mask to be used by cik_gpu_soft_reset().
4840 * Returns a mask of the blocks to be reset.
4841 */
4842u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4843{
4844 u32 reset_mask = 0;
4845 u32 tmp;
4846
4847 /* GRBM_STATUS */
4848 tmp = RREG32(GRBM_STATUS);
4849 if (tmp & (PA_BUSY | SC_BUSY |
4850 BCI_BUSY | SX_BUSY |
4851 TA_BUSY | VGT_BUSY |
4852 DB_BUSY | CB_BUSY |
4853 GDS_BUSY | SPI_BUSY |
4854 IA_BUSY | IA_BUSY_NO_DMA))
4855 reset_mask |= RADEON_RESET_GFX;
4856
4857 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4858 reset_mask |= RADEON_RESET_CP;
4859
4860 /* GRBM_STATUS2 */
4861 tmp = RREG32(GRBM_STATUS2);
4862 if (tmp & RLC_BUSY)
4863 reset_mask |= RADEON_RESET_RLC;
4864
4865 /* SDMA0_STATUS_REG */
4866 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4867 if (!(tmp & SDMA_IDLE))
4868 reset_mask |= RADEON_RESET_DMA;
4869
4870 /* SDMA1_STATUS_REG */
4871 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4872 if (!(tmp & SDMA_IDLE))
4873 reset_mask |= RADEON_RESET_DMA1;
4874
4875 /* SRBM_STATUS2 */
4876 tmp = RREG32(SRBM_STATUS2);
4877 if (tmp & SDMA_BUSY)
4878 reset_mask |= RADEON_RESET_DMA;
4879
4880 if (tmp & SDMA1_BUSY)
4881 reset_mask |= RADEON_RESET_DMA1;
4882
4883 /* SRBM_STATUS */
4884 tmp = RREG32(SRBM_STATUS);
4885
4886 if (tmp & IH_BUSY)
4887 reset_mask |= RADEON_RESET_IH;
4888
4889 if (tmp & SEM_BUSY)
4890 reset_mask |= RADEON_RESET_SEM;
4891
4892 if (tmp & GRBM_RQ_PENDING)
4893 reset_mask |= RADEON_RESET_GRBM;
4894
4895 if (tmp & VMC_BUSY)
4896 reset_mask |= RADEON_RESET_VMC;
4897
4898 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4899 MCC_BUSY | MCD_BUSY))
4900 reset_mask |= RADEON_RESET_MC;
4901
4902 if (evergreen_is_display_hung(rdev))
4903 reset_mask |= RADEON_RESET_DISPLAY;
4904
4905 /* Skip MC reset as it's mostly likely not hung, just busy */
4906 if (reset_mask & RADEON_RESET_MC) {
4907 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4908 reset_mask &= ~RADEON_RESET_MC;
4909 }
4910
4911 return reset_mask;
4912}
4913
4914/**
4915 * cik_gpu_soft_reset - soft reset GPU
4916 *
4917 * @rdev: radeon_device pointer
4918 * @reset_mask: mask of which blocks to reset
4919 *
4920 * Soft reset the blocks specified in @reset_mask.
4921 */
4922static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4923{
4924 struct evergreen_mc_save save;
4925 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4926 u32 tmp;
4927
4928 if (reset_mask == 0)
4929 return;
4930
4931 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4932
4933 cik_print_gpu_status_regs(rdev);
4934 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4935 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4936 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4937 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4938
4939 /* disable CG/PG */
4940 cik_fini_pg(rdev);
4941 cik_fini_cg(rdev);
4942
4943 /* stop the rlc */
4944 cik_rlc_stop(rdev);
4945
4946 /* Disable GFX parsing/prefetching */
4947 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4948
4949 /* Disable MEC parsing/prefetching */
4950 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4951
4952 if (reset_mask & RADEON_RESET_DMA) {
4953 /* sdma0 */
4954 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4955 tmp |= SDMA_HALT;
4956 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4957 }
4958 if (reset_mask & RADEON_RESET_DMA1) {
4959 /* sdma1 */
4960 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4961 tmp |= SDMA_HALT;
4962 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4963 }
4964
4965 evergreen_mc_stop(rdev, &save);
4966 if (evergreen_mc_wait_for_idle(rdev)) {
4967 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4968 }
4969
4970 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4971 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4972
4973 if (reset_mask & RADEON_RESET_CP) {
4974 grbm_soft_reset |= SOFT_RESET_CP;
4975
4976 srbm_soft_reset |= SOFT_RESET_GRBM;
4977 }
4978
4979 if (reset_mask & RADEON_RESET_DMA)
4980 srbm_soft_reset |= SOFT_RESET_SDMA;
4981
4982 if (reset_mask & RADEON_RESET_DMA1)
4983 srbm_soft_reset |= SOFT_RESET_SDMA1;
4984
4985 if (reset_mask & RADEON_RESET_DISPLAY)
4986 srbm_soft_reset |= SOFT_RESET_DC;
4987
4988 if (reset_mask & RADEON_RESET_RLC)
4989 grbm_soft_reset |= SOFT_RESET_RLC;
4990
4991 if (reset_mask & RADEON_RESET_SEM)
4992 srbm_soft_reset |= SOFT_RESET_SEM;
4993
4994 if (reset_mask & RADEON_RESET_IH)
4995 srbm_soft_reset |= SOFT_RESET_IH;
4996
4997 if (reset_mask & RADEON_RESET_GRBM)
4998 srbm_soft_reset |= SOFT_RESET_GRBM;
4999
5000 if (reset_mask & RADEON_RESET_VMC)
5001 srbm_soft_reset |= SOFT_RESET_VMC;
5002
5003 if (!(rdev->flags & RADEON_IS_IGP)) {
5004 if (reset_mask & RADEON_RESET_MC)
5005 srbm_soft_reset |= SOFT_RESET_MC;
5006 }
5007
5008 if (grbm_soft_reset) {
5009 tmp = RREG32(GRBM_SOFT_RESET);
5010 tmp |= grbm_soft_reset;
5011 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5012 WREG32(GRBM_SOFT_RESET, tmp);
5013 tmp = RREG32(GRBM_SOFT_RESET);
5014
5015 udelay(50);
5016
5017 tmp &= ~grbm_soft_reset;
5018 WREG32(GRBM_SOFT_RESET, tmp);
5019 tmp = RREG32(GRBM_SOFT_RESET);
5020 }
5021
5022 if (srbm_soft_reset) {
5023 tmp = RREG32(SRBM_SOFT_RESET);
5024 tmp |= srbm_soft_reset;
5025 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5026 WREG32(SRBM_SOFT_RESET, tmp);
5027 tmp = RREG32(SRBM_SOFT_RESET);
5028
5029 udelay(50);
5030
5031 tmp &= ~srbm_soft_reset;
5032 WREG32(SRBM_SOFT_RESET, tmp);
5033 tmp = RREG32(SRBM_SOFT_RESET);
5034 }
5035
5036 /* Wait a little for things to settle down */
5037 udelay(50);
5038
5039 evergreen_mc_resume(rdev, &save);
5040 udelay(50);
5041
5042 cik_print_gpu_status_regs(rdev);
5043}
5044
5045struct kv_reset_save_regs {
5046 u32 gmcon_reng_execute;
5047 u32 gmcon_misc;
5048 u32 gmcon_misc3;
5049};
5050
5051static void kv_save_regs_for_reset(struct radeon_device *rdev,
5052 struct kv_reset_save_regs *save)
5053{
5054 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5055 save->gmcon_misc = RREG32(GMCON_MISC);
5056 save->gmcon_misc3 = RREG32(GMCON_MISC3);
5057
5058 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5059 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5060 STCTRL_STUTTER_EN));
5061}
5062
5063static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5064 struct kv_reset_save_regs *save)
5065{
5066 int i;
5067
5068 WREG32(GMCON_PGFSM_WRITE, 0);
5069 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5070
5071 for (i = 0; i < 5; i++)
5072 WREG32(GMCON_PGFSM_WRITE, 0);
5073
5074 WREG32(GMCON_PGFSM_WRITE, 0);
5075 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5076
5077 for (i = 0; i < 5; i++)
5078 WREG32(GMCON_PGFSM_WRITE, 0);
5079
5080 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5081 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5082
5083 for (i = 0; i < 5; i++)
5084 WREG32(GMCON_PGFSM_WRITE, 0);
5085
5086 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5087 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5088
5089 for (i = 0; i < 5; i++)
5090 WREG32(GMCON_PGFSM_WRITE, 0);
5091
5092 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5093 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5094
5095 for (i = 0; i < 5; i++)
5096 WREG32(GMCON_PGFSM_WRITE, 0);
5097
5098 WREG32(GMCON_PGFSM_WRITE, 0);
5099 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5100
5101 for (i = 0; i < 5; i++)
5102 WREG32(GMCON_PGFSM_WRITE, 0);
5103
5104 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5105 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5106
5107 for (i = 0; i < 5; i++)
5108 WREG32(GMCON_PGFSM_WRITE, 0);
5109
5110 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5111 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5112
5113 for (i = 0; i < 5; i++)
5114 WREG32(GMCON_PGFSM_WRITE, 0);
5115
5116 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5117 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5118
5119 for (i = 0; i < 5; i++)
5120 WREG32(GMCON_PGFSM_WRITE, 0);
5121
5122 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5123 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5124
5125 for (i = 0; i < 5; i++)
5126 WREG32(GMCON_PGFSM_WRITE, 0);
5127
5128 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5129 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5130
5131 WREG32(GMCON_MISC3, save->gmcon_misc3);
5132 WREG32(GMCON_MISC, save->gmcon_misc);
5133 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5134}
5135
5136static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5137{
5138 struct evergreen_mc_save save;
5139 struct kv_reset_save_regs kv_save = { 0 };
5140 u32 tmp, i;
5141
5142 dev_info(rdev->dev, "GPU pci config reset\n");
5143
5144 /* disable dpm? */
5145
5146 /* disable cg/pg */
5147 cik_fini_pg(rdev);
5148 cik_fini_cg(rdev);
5149
5150 /* Disable GFX parsing/prefetching */
5151 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5152
5153 /* Disable MEC parsing/prefetching */
5154 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5155
5156 /* sdma0 */
5157 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5158 tmp |= SDMA_HALT;
5159 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5160 /* sdma1 */
5161 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5162 tmp |= SDMA_HALT;
5163 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5164 /* XXX other engines? */
5165
5166 /* halt the rlc, disable cp internal ints */
5167 cik_rlc_stop(rdev);
5168
5169 udelay(50);
5170
5171 /* disable mem access */
5172 evergreen_mc_stop(rdev, &save);
5173 if (evergreen_mc_wait_for_idle(rdev)) {
5174 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5175 }
5176
5177 if (rdev->flags & RADEON_IS_IGP)
5178 kv_save_regs_for_reset(rdev, &kv_save);
5179
5180 /* disable BM */
5181 pci_clear_master(rdev->pdev);
5182 /* reset */
5183 radeon_pci_config_reset(rdev);
5184
5185 udelay(100);
5186
5187 /* wait for asic to come out of reset */
5188 for (i = 0; i < rdev->usec_timeout; i++) {
5189 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5190 break;
5191 udelay(1);
5192 }
5193
5194 /* does asic init need to be run first??? */
5195 if (rdev->flags & RADEON_IS_IGP)
5196 kv_restore_regs_for_reset(rdev, &kv_save);
5197}
5198
5199/**
5200 * cik_asic_reset - soft reset GPU
5201 *
5202 * @rdev: radeon_device pointer
5203 * @hard: force hard reset
5204 *
5205 * Look up which blocks are hung and attempt
5206 * to reset them.
5207 * Returns 0 for success.
5208 */
5209int cik_asic_reset(struct radeon_device *rdev, bool hard)
5210{
5211 u32 reset_mask;
5212
5213 if (hard) {
5214 cik_gpu_pci_config_reset(rdev);
5215 return 0;
5216 }
5217
5218 reset_mask = cik_gpu_check_soft_reset(rdev);
5219
5220 if (reset_mask)
5221 r600_set_bios_scratch_engine_hung(rdev, true);
5222
5223 /* try soft reset */
5224 cik_gpu_soft_reset(rdev, reset_mask);
5225
5226 reset_mask = cik_gpu_check_soft_reset(rdev);
5227
5228 /* try pci config reset */
5229 if (reset_mask && radeon_hard_reset)
5230 cik_gpu_pci_config_reset(rdev);
5231
5232 reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234 if (!reset_mask)
5235 r600_set_bios_scratch_engine_hung(rdev, false);
5236
5237 return 0;
5238}
5239
5240/**
5241 * cik_gfx_is_lockup - check if the 3D engine is locked up
5242 *
5243 * @rdev: radeon_device pointer
5244 * @ring: radeon_ring structure holding ring information
5245 *
5246 * Check if the 3D engine is locked up (CIK).
5247 * Returns true if the engine is locked, false if not.
5248 */
5249bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5250{
5251 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5252
5253 if (!(reset_mask & (RADEON_RESET_GFX |
5254 RADEON_RESET_COMPUTE |
5255 RADEON_RESET_CP))) {
5256 radeon_ring_lockup_update(rdev, ring);
5257 return false;
5258 }
5259 return radeon_ring_test_lockup(rdev, ring);
5260}
5261
5262/* MC */
5263/**
5264 * cik_mc_program - program the GPU memory controller
5265 *
5266 * @rdev: radeon_device pointer
5267 *
5268 * Set the location of vram, gart, and AGP in the GPU's
5269 * physical address space (CIK).
5270 */
5271static void cik_mc_program(struct radeon_device *rdev)
5272{
5273 struct evergreen_mc_save save;
5274 u32 tmp;
5275 int i, j;
5276
5277 /* Initialize HDP */
5278 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5279 WREG32((0x2c14 + j), 0x00000000);
5280 WREG32((0x2c18 + j), 0x00000000);
5281 WREG32((0x2c1c + j), 0x00000000);
5282 WREG32((0x2c20 + j), 0x00000000);
5283 WREG32((0x2c24 + j), 0x00000000);
5284 }
5285 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5286
5287 evergreen_mc_stop(rdev, &save);
5288 if (radeon_mc_wait_for_idle(rdev)) {
5289 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5290 }
5291 /* Lockout access through VGA aperture*/
5292 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5293 /* Update configuration */
5294 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5295 rdev->mc.vram_start >> 12);
5296 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5297 rdev->mc.vram_end >> 12);
5298 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5299 rdev->vram_scratch.gpu_addr >> 12);
5300 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5301 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5302 WREG32(MC_VM_FB_LOCATION, tmp);
5303 /* XXX double check these! */
5304 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5305 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5306 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5307 WREG32(MC_VM_AGP_BASE, 0);
5308 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5309 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5310 if (radeon_mc_wait_for_idle(rdev)) {
5311 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5312 }
5313 evergreen_mc_resume(rdev, &save);
5314 /* we need to own VRAM, so turn off the VGA renderer here
5315 * to stop it overwriting our objects */
5316 rv515_vga_render_disable(rdev);
5317}
5318
5319/**
5320 * cik_mc_init - initialize the memory controller driver params
5321 *
5322 * @rdev: radeon_device pointer
5323 *
5324 * Look up the amount of vram, vram width, and decide how to place
5325 * vram and gart within the GPU's physical address space (CIK).
5326 * Returns 0 for success.
5327 */
5328static int cik_mc_init(struct radeon_device *rdev)
5329{
5330 u32 tmp;
5331 int chansize, numchan;
5332
5333 /* Get VRAM informations */
5334 rdev->mc.vram_is_ddr = true;
5335 tmp = RREG32(MC_ARB_RAMCFG);
5336 if (tmp & CHANSIZE_MASK) {
5337 chansize = 64;
5338 } else {
5339 chansize = 32;
5340 }
5341 tmp = RREG32(MC_SHARED_CHMAP);
5342 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5343 case 0:
5344 default:
5345 numchan = 1;
5346 break;
5347 case 1:
5348 numchan = 2;
5349 break;
5350 case 2:
5351 numchan = 4;
5352 break;
5353 case 3:
5354 numchan = 8;
5355 break;
5356 case 4:
5357 numchan = 3;
5358 break;
5359 case 5:
5360 numchan = 6;
5361 break;
5362 case 6:
5363 numchan = 10;
5364 break;
5365 case 7:
5366 numchan = 12;
5367 break;
5368 case 8:
5369 numchan = 16;
5370 break;
5371 }
5372 rdev->mc.vram_width = numchan * chansize;
5373 /* Could aper size report 0 ? */
5374 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5375 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5376 /* size in MB on si */
5377 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5378 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5379 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5380 si_vram_gtt_location(rdev, &rdev->mc);
5381 radeon_update_bandwidth_info(rdev);
5382
5383 return 0;
5384}
5385
5386/*
5387 * GART
5388 * VMID 0 is the physical GPU addresses as used by the kernel.
5389 * VMIDs 1-15 are used for userspace clients and are handled
5390 * by the radeon vm/hsa code.
5391 */
5392/**
5393 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5394 *
5395 * @rdev: radeon_device pointer
5396 *
5397 * Flush the TLB for the VMID 0 page table (CIK).
5398 */
5399void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5400{
5401 /* flush hdp cache */
5402 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5403
5404 /* bits 0-15 are the VM contexts0-15 */
5405 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5406}
5407
5408/**
5409 * cik_pcie_gart_enable - gart enable
5410 *
5411 * @rdev: radeon_device pointer
5412 *
5413 * This sets up the TLBs, programs the page tables for VMID0,
5414 * sets up the hw for VMIDs 1-15 which are allocated on
5415 * demand, and sets up the global locations for the LDS, GDS,
5416 * and GPUVM for FSA64 clients (CIK).
5417 * Returns 0 for success, errors for failure.
5418 */
5419static int cik_pcie_gart_enable(struct radeon_device *rdev)
5420{
5421 int r, i;
5422
5423 if (rdev->gart.robj == NULL) {
5424 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5425 return -EINVAL;
5426 }
5427 r = radeon_gart_table_vram_pin(rdev);
5428 if (r)
5429 return r;
5430 /* Setup TLB control */
5431 WREG32(MC_VM_MX_L1_TLB_CNTL,
5432 (0xA << 7) |
5433 ENABLE_L1_TLB |
5434 ENABLE_L1_FRAGMENT_PROCESSING |
5435 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5436 ENABLE_ADVANCED_DRIVER_MODEL |
5437 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5438 /* Setup L2 cache */
5439 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5440 ENABLE_L2_FRAGMENT_PROCESSING |
5441 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5442 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5443 EFFECTIVE_L2_QUEUE_SIZE(7) |
5444 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5445 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5446 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5447 BANK_SELECT(4) |
5448 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5449 /* setup context0 */
5450 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5451 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5452 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5453 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5454 (u32)(rdev->dummy_page.addr >> 12));
5455 WREG32(VM_CONTEXT0_CNTL2, 0);
5456 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5457 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5458
5459 WREG32(0x15D4, 0);
5460 WREG32(0x15D8, 0);
5461 WREG32(0x15DC, 0);
5462
5463 /* restore context1-15 */
5464 /* set vm size, must be a multiple of 4 */
5465 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5466 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5467 for (i = 1; i < 16; i++) {
5468 if (i < 8)
5469 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5470 rdev->vm_manager.saved_table_addr[i]);
5471 else
5472 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5473 rdev->vm_manager.saved_table_addr[i]);
5474 }
5475
5476 /* enable context1-15 */
5477 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5478 (u32)(rdev->dummy_page.addr >> 12));
5479 WREG32(VM_CONTEXT1_CNTL2, 4);
5480 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5481 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5482 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5483 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5484 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5485 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5486 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5487 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5488 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5489 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5490 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5491 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5492 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5493 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5494
5495 if (rdev->family == CHIP_KAVERI) {
5496 u32 tmp = RREG32(CHUB_CONTROL);
5497 tmp &= ~BYPASS_VM;
5498 WREG32(CHUB_CONTROL, tmp);
5499 }
5500
5501 /* XXX SH_MEM regs */
5502 /* where to put LDS, scratch, GPUVM in FSA64 space */
5503 mutex_lock(&rdev->srbm_mutex);
5504 for (i = 0; i < 16; i++) {
5505 cik_srbm_select(rdev, 0, 0, 0, i);
5506 /* CP and shaders */
5507 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5508 WREG32(SH_MEM_APE1_BASE, 1);
5509 WREG32(SH_MEM_APE1_LIMIT, 0);
5510 WREG32(SH_MEM_BASES, 0);
5511 /* SDMA GFX */
5512 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5513 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5514 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5515 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5516 /* XXX SDMA RLC - todo */
5517 }
5518 cik_srbm_select(rdev, 0, 0, 0, 0);
5519 mutex_unlock(&rdev->srbm_mutex);
5520
5521 cik_pcie_gart_tlb_flush(rdev);
5522 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5523 (unsigned)(rdev->mc.gtt_size >> 20),
5524 (unsigned long long)rdev->gart.table_addr);
5525 rdev->gart.ready = true;
5526 return 0;
5527}
5528
5529/**
5530 * cik_pcie_gart_disable - gart disable
5531 *
5532 * @rdev: radeon_device pointer
5533 *
5534 * This disables all VM page table (CIK).
5535 */
5536static void cik_pcie_gart_disable(struct radeon_device *rdev)
5537{
5538 unsigned i;
5539
5540 for (i = 1; i < 16; ++i) {
5541 uint32_t reg;
5542 if (i < 8)
5543 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5544 else
5545 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5546 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5547 }
5548
5549 /* Disable all tables */
5550 WREG32(VM_CONTEXT0_CNTL, 0);
5551 WREG32(VM_CONTEXT1_CNTL, 0);
5552 /* Setup TLB control */
5553 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5554 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5555 /* Setup L2 cache */
5556 WREG32(VM_L2_CNTL,
5557 ENABLE_L2_FRAGMENT_PROCESSING |
5558 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5559 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5560 EFFECTIVE_L2_QUEUE_SIZE(7) |
5561 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5562 WREG32(VM_L2_CNTL2, 0);
5563 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5564 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5565 radeon_gart_table_vram_unpin(rdev);
5566}
5567
5568/**
5569 * cik_pcie_gart_fini - vm fini callback
5570 *
5571 * @rdev: radeon_device pointer
5572 *
5573 * Tears down the driver GART/VM setup (CIK).
5574 */
5575static void cik_pcie_gart_fini(struct radeon_device *rdev)
5576{
5577 cik_pcie_gart_disable(rdev);
5578 radeon_gart_table_vram_free(rdev);
5579 radeon_gart_fini(rdev);
5580}
5581
5582/* vm parser */
5583/**
5584 * cik_ib_parse - vm ib_parse callback
5585 *
5586 * @rdev: radeon_device pointer
5587 * @ib: indirect buffer pointer
5588 *
5589 * CIK uses hw IB checking so this is a nop (CIK).
5590 */
5591int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5592{
5593 return 0;
5594}
5595
5596/*
5597 * vm
5598 * VMID 0 is the physical GPU addresses as used by the kernel.
5599 * VMIDs 1-15 are used for userspace clients and are handled
5600 * by the radeon vm/hsa code.
5601 */
5602/**
5603 * cik_vm_init - cik vm init callback
5604 *
5605 * @rdev: radeon_device pointer
5606 *
5607 * Inits cik specific vm parameters (number of VMs, base of vram for
5608 * VMIDs 1-15) (CIK).
5609 * Returns 0 for success.
5610 */
5611int cik_vm_init(struct radeon_device *rdev)
5612{
5613 /*
5614 * number of VMs
5615 * VMID 0 is reserved for System
5616 * radeon graphics/compute will use VMIDs 1-15
5617 */
5618 rdev->vm_manager.nvm = 16;
5619 /* base offset of vram pages */
5620 if (rdev->flags & RADEON_IS_IGP) {
5621 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5622 tmp <<= 22;
5623 rdev->vm_manager.vram_base_offset = tmp;
5624 } else
5625 rdev->vm_manager.vram_base_offset = 0;
5626
5627 return 0;
5628}
5629
5630/**
5631 * cik_vm_fini - cik vm fini callback
5632 *
5633 * @rdev: radeon_device pointer
5634 *
5635 * Tear down any asic specific VM setup (CIK).
5636 */
5637void cik_vm_fini(struct radeon_device *rdev)
5638{
5639}
5640
5641/**
5642 * cik_vm_decode_fault - print human readable fault info
5643 *
5644 * @rdev: radeon_device pointer
5645 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5646 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5647 * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value
5648 *
5649 * Print human readable fault information (CIK).
5650 */
5651static void cik_vm_decode_fault(struct radeon_device *rdev,
5652 u32 status, u32 addr, u32 mc_client)
5653{
5654 u32 mc_id;
5655 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5656 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5657 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5658 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5659
5660 if (rdev->family == CHIP_HAWAII)
5661 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5662 else
5663 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5664
5665 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5666 protections, vmid, addr,
5667 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5668 block, mc_client, mc_id);
5669}
5670
5671/*
5672 * cik_vm_flush - cik vm flush using the CP
5673 *
5674 * Update the page table base and flush the VM TLB
5675 * using the CP (CIK).
5676 */
5677void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5678 unsigned vm_id, uint64_t pd_addr)
5679{
5680 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5681
5682 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5683 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5684 WRITE_DATA_DST_SEL(0)));
5685 if (vm_id < 8) {
5686 radeon_ring_write(ring,
5687 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5688 } else {
5689 radeon_ring_write(ring,
5690 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5691 }
5692 radeon_ring_write(ring, 0);
5693 radeon_ring_write(ring, pd_addr >> 12);
5694
5695 /* update SH_MEM_* regs */
5696 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5697 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5698 WRITE_DATA_DST_SEL(0)));
5699 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5700 radeon_ring_write(ring, 0);
5701 radeon_ring_write(ring, VMID(vm_id));
5702
5703 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5704 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5705 WRITE_DATA_DST_SEL(0)));
5706 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5707 radeon_ring_write(ring, 0);
5708
5709 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5710 radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5711 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5712 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5713
5714 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5715 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5716 WRITE_DATA_DST_SEL(0)));
5717 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5718 radeon_ring_write(ring, 0);
5719 radeon_ring_write(ring, VMID(0));
5720
5721 /* HDP flush */
5722 cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5723
5724 /* bits 0-15 are the VM contexts0-15 */
5725 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5726 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5727 WRITE_DATA_DST_SEL(0)));
5728 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5729 radeon_ring_write(ring, 0);
5730 radeon_ring_write(ring, 1 << vm_id);
5731
5732 /* wait for the invalidate to complete */
5733 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5734 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5735 WAIT_REG_MEM_FUNCTION(0) | /* always */
5736 WAIT_REG_MEM_ENGINE(0))); /* me */
5737 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5738 radeon_ring_write(ring, 0);
5739 radeon_ring_write(ring, 0); /* ref */
5740 radeon_ring_write(ring, 0); /* mask */
5741 radeon_ring_write(ring, 0x20); /* poll interval */
5742
5743 /* compute doesn't have PFP */
5744 if (usepfp) {
5745 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5746 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5747 radeon_ring_write(ring, 0x0);
5748 }
5749}
5750
5751/*
5752 * RLC
5753 * The RLC is a multi-purpose microengine that handles a
5754 * variety of functions, the most important of which is
5755 * the interrupt controller.
5756 */
5757static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5758 bool enable)
5759{
5760 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5761
5762 if (enable)
5763 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5764 else
5765 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5766 WREG32(CP_INT_CNTL_RING0, tmp);
5767}
5768
5769static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5770{
5771 u32 tmp;
5772
5773 tmp = RREG32(RLC_LB_CNTL);
5774 if (enable)
5775 tmp |= LOAD_BALANCE_ENABLE;
5776 else
5777 tmp &= ~LOAD_BALANCE_ENABLE;
5778 WREG32(RLC_LB_CNTL, tmp);
5779}
5780
5781static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5782{
5783 u32 i, j, k;
5784 u32 mask;
5785
5786 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5787 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5788 cik_select_se_sh(rdev, i, j);
5789 for (k = 0; k < rdev->usec_timeout; k++) {
5790 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5791 break;
5792 udelay(1);
5793 }
5794 }
5795 }
5796 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5797
5798 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5799 for (k = 0; k < rdev->usec_timeout; k++) {
5800 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5801 break;
5802 udelay(1);
5803 }
5804}
5805
5806static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5807{
5808 u32 tmp;
5809
5810 tmp = RREG32(RLC_CNTL);
5811 if (tmp != rlc)
5812 WREG32(RLC_CNTL, rlc);
5813}
5814
5815static u32 cik_halt_rlc(struct radeon_device *rdev)
5816{
5817 u32 data, orig;
5818
5819 orig = data = RREG32(RLC_CNTL);
5820
5821 if (data & RLC_ENABLE) {
5822 u32 i;
5823
5824 data &= ~RLC_ENABLE;
5825 WREG32(RLC_CNTL, data);
5826
5827 for (i = 0; i < rdev->usec_timeout; i++) {
5828 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5829 break;
5830 udelay(1);
5831 }
5832
5833 cik_wait_for_rlc_serdes(rdev);
5834 }
5835
5836 return orig;
5837}
5838
5839void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5840{
5841 u32 tmp, i, mask;
5842
5843 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5844 WREG32(RLC_GPR_REG2, tmp);
5845
5846 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5847 for (i = 0; i < rdev->usec_timeout; i++) {
5848 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5849 break;
5850 udelay(1);
5851 }
5852
5853 for (i = 0; i < rdev->usec_timeout; i++) {
5854 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5855 break;
5856 udelay(1);
5857 }
5858}
5859
5860void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5861{
5862 u32 tmp;
5863
5864 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5865 WREG32(RLC_GPR_REG2, tmp);
5866}
5867
5868/**
5869 * cik_rlc_stop - stop the RLC ME
5870 *
5871 * @rdev: radeon_device pointer
5872 *
5873 * Halt the RLC ME (MicroEngine) (CIK).
5874 */
5875static void cik_rlc_stop(struct radeon_device *rdev)
5876{
5877 WREG32(RLC_CNTL, 0);
5878
5879 cik_enable_gui_idle_interrupt(rdev, false);
5880
5881 cik_wait_for_rlc_serdes(rdev);
5882}
5883
5884/**
5885 * cik_rlc_start - start the RLC ME
5886 *
5887 * @rdev: radeon_device pointer
5888 *
5889 * Unhalt the RLC ME (MicroEngine) (CIK).
5890 */
5891static void cik_rlc_start(struct radeon_device *rdev)
5892{
5893 WREG32(RLC_CNTL, RLC_ENABLE);
5894
5895 cik_enable_gui_idle_interrupt(rdev, true);
5896
5897 udelay(50);
5898}
5899
5900/**
5901 * cik_rlc_resume - setup the RLC hw
5902 *
5903 * @rdev: radeon_device pointer
5904 *
5905 * Initialize the RLC registers, load the ucode,
5906 * and start the RLC (CIK).
5907 * Returns 0 for success, -EINVAL if the ucode is not available.
5908 */
5909static int cik_rlc_resume(struct radeon_device *rdev)
5910{
5911 u32 i, size, tmp;
5912
5913 if (!rdev->rlc_fw)
5914 return -EINVAL;
5915
5916 cik_rlc_stop(rdev);
5917
5918 /* disable CG */
5919 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5920 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5921
5922 si_rlc_reset(rdev);
5923
5924 cik_init_pg(rdev);
5925
5926 cik_init_cg(rdev);
5927
5928 WREG32(RLC_LB_CNTR_INIT, 0);
5929 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5930
5931 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5932 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5933 WREG32(RLC_LB_PARAMS, 0x00600408);
5934 WREG32(RLC_LB_CNTL, 0x80000004);
5935
5936 WREG32(RLC_MC_CNTL, 0);
5937 WREG32(RLC_UCODE_CNTL, 0);
5938
5939 if (rdev->new_fw) {
5940 const struct rlc_firmware_header_v1_0 *hdr =
5941 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5942 const __le32 *fw_data = (const __le32 *)
5943 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5944
5945 radeon_ucode_print_rlc_hdr(&hdr->header);
5946
5947 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5948 WREG32(RLC_GPM_UCODE_ADDR, 0);
5949 for (i = 0; i < size; i++)
5950 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5951 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5952 } else {
5953 const __be32 *fw_data;
5954
5955 switch (rdev->family) {
5956 case CHIP_BONAIRE:
5957 case CHIP_HAWAII:
5958 default:
5959 size = BONAIRE_RLC_UCODE_SIZE;
5960 break;
5961 case CHIP_KAVERI:
5962 size = KV_RLC_UCODE_SIZE;
5963 break;
5964 case CHIP_KABINI:
5965 size = KB_RLC_UCODE_SIZE;
5966 break;
5967 case CHIP_MULLINS:
5968 size = ML_RLC_UCODE_SIZE;
5969 break;
5970 }
5971
5972 fw_data = (const __be32 *)rdev->rlc_fw->data;
5973 WREG32(RLC_GPM_UCODE_ADDR, 0);
5974 for (i = 0; i < size; i++)
5975 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5976 WREG32(RLC_GPM_UCODE_ADDR, 0);
5977 }
5978
5979 /* XXX - find out what chips support lbpw */
5980 cik_enable_lbpw(rdev, false);
5981
5982 if (rdev->family == CHIP_BONAIRE)
5983 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5984
5985 cik_rlc_start(rdev);
5986
5987 return 0;
5988}
5989
5990static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5991{
5992 u32 data, orig, tmp, tmp2;
5993
5994 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5995
5996 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5997 cik_enable_gui_idle_interrupt(rdev, true);
5998
5999 tmp = cik_halt_rlc(rdev);
6000
6001 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6002 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6003 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6004 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6005 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6006
6007 cik_update_rlc(rdev, tmp);
6008
6009 data |= CGCG_EN | CGLS_EN;
6010 } else {
6011 cik_enable_gui_idle_interrupt(rdev, false);
6012
6013 RREG32(CB_CGTT_SCLK_CTRL);
6014 RREG32(CB_CGTT_SCLK_CTRL);
6015 RREG32(CB_CGTT_SCLK_CTRL);
6016 RREG32(CB_CGTT_SCLK_CTRL);
6017
6018 data &= ~(CGCG_EN | CGLS_EN);
6019 }
6020
6021 if (orig != data)
6022 WREG32(RLC_CGCG_CGLS_CTRL, data);
6023
6024}
6025
6026static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6027{
6028 u32 data, orig, tmp = 0;
6029
6030 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6031 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6032 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6033 orig = data = RREG32(CP_MEM_SLP_CNTL);
6034 data |= CP_MEM_LS_EN;
6035 if (orig != data)
6036 WREG32(CP_MEM_SLP_CNTL, data);
6037 }
6038 }
6039
6040 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6041 data |= 0x00000001;
6042 data &= 0xfffffffd;
6043 if (orig != data)
6044 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6045
6046 tmp = cik_halt_rlc(rdev);
6047
6048 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6049 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6050 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6051 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6052 WREG32(RLC_SERDES_WR_CTRL, data);
6053
6054 cik_update_rlc(rdev, tmp);
6055
6056 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6057 orig = data = RREG32(CGTS_SM_CTRL_REG);
6058 data &= ~SM_MODE_MASK;
6059 data |= SM_MODE(0x2);
6060 data |= SM_MODE_ENABLE;
6061 data &= ~CGTS_OVERRIDE;
6062 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6063 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6064 data &= ~CGTS_LS_OVERRIDE;
6065 data &= ~ON_MONITOR_ADD_MASK;
6066 data |= ON_MONITOR_ADD_EN;
6067 data |= ON_MONITOR_ADD(0x96);
6068 if (orig != data)
6069 WREG32(CGTS_SM_CTRL_REG, data);
6070 }
6071 } else {
6072 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6073 data |= 0x00000003;
6074 if (orig != data)
6075 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6076
6077 data = RREG32(RLC_MEM_SLP_CNTL);
6078 if (data & RLC_MEM_LS_EN) {
6079 data &= ~RLC_MEM_LS_EN;
6080 WREG32(RLC_MEM_SLP_CNTL, data);
6081 }
6082
6083 data = RREG32(CP_MEM_SLP_CNTL);
6084 if (data & CP_MEM_LS_EN) {
6085 data &= ~CP_MEM_LS_EN;
6086 WREG32(CP_MEM_SLP_CNTL, data);
6087 }
6088
6089 orig = data = RREG32(CGTS_SM_CTRL_REG);
6090 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6091 if (orig != data)
6092 WREG32(CGTS_SM_CTRL_REG, data);
6093
6094 tmp = cik_halt_rlc(rdev);
6095
6096 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6097 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6098 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6099 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6100 WREG32(RLC_SERDES_WR_CTRL, data);
6101
6102 cik_update_rlc(rdev, tmp);
6103 }
6104}
6105
6106static const u32 mc_cg_registers[] =
6107{
6108 MC_HUB_MISC_HUB_CG,
6109 MC_HUB_MISC_SIP_CG,
6110 MC_HUB_MISC_VM_CG,
6111 MC_XPB_CLK_GAT,
6112 ATC_MISC_CG,
6113 MC_CITF_MISC_WR_CG,
6114 MC_CITF_MISC_RD_CG,
6115 MC_CITF_MISC_VM_CG,
6116 VM_L2_CG,
6117};
6118
6119static void cik_enable_mc_ls(struct radeon_device *rdev,
6120 bool enable)
6121{
6122 int i;
6123 u32 orig, data;
6124
6125 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6126 orig = data = RREG32(mc_cg_registers[i]);
6127 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6128 data |= MC_LS_ENABLE;
6129 else
6130 data &= ~MC_LS_ENABLE;
6131 if (data != orig)
6132 WREG32(mc_cg_registers[i], data);
6133 }
6134}
6135
6136static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6137 bool enable)
6138{
6139 int i;
6140 u32 orig, data;
6141
6142 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6143 orig = data = RREG32(mc_cg_registers[i]);
6144 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6145 data |= MC_CG_ENABLE;
6146 else
6147 data &= ~MC_CG_ENABLE;
6148 if (data != orig)
6149 WREG32(mc_cg_registers[i], data);
6150 }
6151}
6152
6153static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6154 bool enable)
6155{
6156 u32 orig, data;
6157
6158 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6159 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6160 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6161 } else {
6162 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6163 data |= 0xff000000;
6164 if (data != orig)
6165 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6166
6167 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6168 data |= 0xff000000;
6169 if (data != orig)
6170 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6171 }
6172}
6173
6174static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6175 bool enable)
6176{
6177 u32 orig, data;
6178
6179 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6180 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6181 data |= 0x100;
6182 if (orig != data)
6183 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6184
6185 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6186 data |= 0x100;
6187 if (orig != data)
6188 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6189 } else {
6190 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6191 data &= ~0x100;
6192 if (orig != data)
6193 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6194
6195 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6196 data &= ~0x100;
6197 if (orig != data)
6198 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6199 }
6200}
6201
6202static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6203 bool enable)
6204{
6205 u32 orig, data;
6206
6207 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6208 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6209 data = 0xfff;
6210 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6211
6212 orig = data = RREG32(UVD_CGC_CTRL);
6213 data |= DCM;
6214 if (orig != data)
6215 WREG32(UVD_CGC_CTRL, data);
6216 } else {
6217 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6218 data &= ~0xfff;
6219 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6220
6221 orig = data = RREG32(UVD_CGC_CTRL);
6222 data &= ~DCM;
6223 if (orig != data)
6224 WREG32(UVD_CGC_CTRL, data);
6225 }
6226}
6227
6228static void cik_enable_bif_mgls(struct radeon_device *rdev,
6229 bool enable)
6230{
6231 u32 orig, data;
6232
6233 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6234
6235 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6236 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6237 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6238 else
6239 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6240 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6241
6242 if (orig != data)
6243 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6244}
6245
6246static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6247 bool enable)
6248{
6249 u32 orig, data;
6250
6251 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6252
6253 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6254 data &= ~CLOCK_GATING_DIS;
6255 else
6256 data |= CLOCK_GATING_DIS;
6257
6258 if (orig != data)
6259 WREG32(HDP_HOST_PATH_CNTL, data);
6260}
6261
6262static void cik_enable_hdp_ls(struct radeon_device *rdev,
6263 bool enable)
6264{
6265 u32 orig, data;
6266
6267 orig = data = RREG32(HDP_MEM_POWER_LS);
6268
6269 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6270 data |= HDP_LS_ENABLE;
6271 else
6272 data &= ~HDP_LS_ENABLE;
6273
6274 if (orig != data)
6275 WREG32(HDP_MEM_POWER_LS, data);
6276}
6277
6278void cik_update_cg(struct radeon_device *rdev,
6279 u32 block, bool enable)
6280{
6281
6282 if (block & RADEON_CG_BLOCK_GFX) {
6283 cik_enable_gui_idle_interrupt(rdev, false);
6284 /* order matters! */
6285 if (enable) {
6286 cik_enable_mgcg(rdev, true);
6287 cik_enable_cgcg(rdev, true);
6288 } else {
6289 cik_enable_cgcg(rdev, false);
6290 cik_enable_mgcg(rdev, false);
6291 }
6292 cik_enable_gui_idle_interrupt(rdev, true);
6293 }
6294
6295 if (block & RADEON_CG_BLOCK_MC) {
6296 if (!(rdev->flags & RADEON_IS_IGP)) {
6297 cik_enable_mc_mgcg(rdev, enable);
6298 cik_enable_mc_ls(rdev, enable);
6299 }
6300 }
6301
6302 if (block & RADEON_CG_BLOCK_SDMA) {
6303 cik_enable_sdma_mgcg(rdev, enable);
6304 cik_enable_sdma_mgls(rdev, enable);
6305 }
6306
6307 if (block & RADEON_CG_BLOCK_BIF) {
6308 cik_enable_bif_mgls(rdev, enable);
6309 }
6310
6311 if (block & RADEON_CG_BLOCK_UVD) {
6312 if (rdev->has_uvd)
6313 cik_enable_uvd_mgcg(rdev, enable);
6314 }
6315
6316 if (block & RADEON_CG_BLOCK_HDP) {
6317 cik_enable_hdp_mgcg(rdev, enable);
6318 cik_enable_hdp_ls(rdev, enable);
6319 }
6320
6321 if (block & RADEON_CG_BLOCK_VCE) {
6322 vce_v2_0_enable_mgcg(rdev, enable);
6323 }
6324}
6325
6326static void cik_init_cg(struct radeon_device *rdev)
6327{
6328
6329 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6330
6331 if (rdev->has_uvd)
6332 si_init_uvd_internal_cg(rdev);
6333
6334 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6335 RADEON_CG_BLOCK_SDMA |
6336 RADEON_CG_BLOCK_BIF |
6337 RADEON_CG_BLOCK_UVD |
6338 RADEON_CG_BLOCK_HDP), true);
6339}
6340
6341static void cik_fini_cg(struct radeon_device *rdev)
6342{
6343 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6344 RADEON_CG_BLOCK_SDMA |
6345 RADEON_CG_BLOCK_BIF |
6346 RADEON_CG_BLOCK_UVD |
6347 RADEON_CG_BLOCK_HDP), false);
6348
6349 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6350}
6351
6352static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6353 bool enable)
6354{
6355 u32 data, orig;
6356
6357 orig = data = RREG32(RLC_PG_CNTL);
6358 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6359 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6360 else
6361 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6362 if (orig != data)
6363 WREG32(RLC_PG_CNTL, data);
6364}
6365
6366static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6367 bool enable)
6368{
6369 u32 data, orig;
6370
6371 orig = data = RREG32(RLC_PG_CNTL);
6372 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6373 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6374 else
6375 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6376 if (orig != data)
6377 WREG32(RLC_PG_CNTL, data);
6378}
6379
6380static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6381{
6382 u32 data, orig;
6383
6384 orig = data = RREG32(RLC_PG_CNTL);
6385 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6386 data &= ~DISABLE_CP_PG;
6387 else
6388 data |= DISABLE_CP_PG;
6389 if (orig != data)
6390 WREG32(RLC_PG_CNTL, data);
6391}
6392
6393static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6394{
6395 u32 data, orig;
6396
6397 orig = data = RREG32(RLC_PG_CNTL);
6398 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6399 data &= ~DISABLE_GDS_PG;
6400 else
6401 data |= DISABLE_GDS_PG;
6402 if (orig != data)
6403 WREG32(RLC_PG_CNTL, data);
6404}
6405
6406#define CP_ME_TABLE_SIZE 96
6407#define CP_ME_TABLE_OFFSET 2048
6408#define CP_MEC_TABLE_OFFSET 4096
6409
6410void cik_init_cp_pg_table(struct radeon_device *rdev)
6411{
6412 volatile u32 *dst_ptr;
6413 int me, i, max_me = 4;
6414 u32 bo_offset = 0;
6415 u32 table_offset, table_size;
6416
6417 if (rdev->family == CHIP_KAVERI)
6418 max_me = 5;
6419
6420 if (rdev->rlc.cp_table_ptr == NULL)
6421 return;
6422
6423 /* write the cp table buffer */
6424 dst_ptr = rdev->rlc.cp_table_ptr;
6425 for (me = 0; me < max_me; me++) {
6426 if (rdev->new_fw) {
6427 const __le32 *fw_data;
6428 const struct gfx_firmware_header_v1_0 *hdr;
6429
6430 if (me == 0) {
6431 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6432 fw_data = (const __le32 *)
6433 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6434 table_offset = le32_to_cpu(hdr->jt_offset);
6435 table_size = le32_to_cpu(hdr->jt_size);
6436 } else if (me == 1) {
6437 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6438 fw_data = (const __le32 *)
6439 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6440 table_offset = le32_to_cpu(hdr->jt_offset);
6441 table_size = le32_to_cpu(hdr->jt_size);
6442 } else if (me == 2) {
6443 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6444 fw_data = (const __le32 *)
6445 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6446 table_offset = le32_to_cpu(hdr->jt_offset);
6447 table_size = le32_to_cpu(hdr->jt_size);
6448 } else if (me == 3) {
6449 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6450 fw_data = (const __le32 *)
6451 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6452 table_offset = le32_to_cpu(hdr->jt_offset);
6453 table_size = le32_to_cpu(hdr->jt_size);
6454 } else {
6455 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6456 fw_data = (const __le32 *)
6457 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6458 table_offset = le32_to_cpu(hdr->jt_offset);
6459 table_size = le32_to_cpu(hdr->jt_size);
6460 }
6461
6462 for (i = 0; i < table_size; i ++) {
6463 dst_ptr[bo_offset + i] =
6464 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6465 }
6466 bo_offset += table_size;
6467 } else {
6468 const __be32 *fw_data;
6469 table_size = CP_ME_TABLE_SIZE;
6470
6471 if (me == 0) {
6472 fw_data = (const __be32 *)rdev->ce_fw->data;
6473 table_offset = CP_ME_TABLE_OFFSET;
6474 } else if (me == 1) {
6475 fw_data = (const __be32 *)rdev->pfp_fw->data;
6476 table_offset = CP_ME_TABLE_OFFSET;
6477 } else if (me == 2) {
6478 fw_data = (const __be32 *)rdev->me_fw->data;
6479 table_offset = CP_ME_TABLE_OFFSET;
6480 } else {
6481 fw_data = (const __be32 *)rdev->mec_fw->data;
6482 table_offset = CP_MEC_TABLE_OFFSET;
6483 }
6484
6485 for (i = 0; i < table_size; i ++) {
6486 dst_ptr[bo_offset + i] =
6487 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6488 }
6489 bo_offset += table_size;
6490 }
6491 }
6492}
6493
6494static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6495 bool enable)
6496{
6497 u32 data, orig;
6498
6499 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6500 orig = data = RREG32(RLC_PG_CNTL);
6501 data |= GFX_PG_ENABLE;
6502 if (orig != data)
6503 WREG32(RLC_PG_CNTL, data);
6504
6505 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6506 data |= AUTO_PG_EN;
6507 if (orig != data)
6508 WREG32(RLC_AUTO_PG_CTRL, data);
6509 } else {
6510 orig = data = RREG32(RLC_PG_CNTL);
6511 data &= ~GFX_PG_ENABLE;
6512 if (orig != data)
6513 WREG32(RLC_PG_CNTL, data);
6514
6515 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6516 data &= ~AUTO_PG_EN;
6517 if (orig != data)
6518 WREG32(RLC_AUTO_PG_CTRL, data);
6519
6520 data = RREG32(DB_RENDER_CONTROL);
6521 }
6522}
6523
6524static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6525{
6526 u32 mask = 0, tmp, tmp1;
6527 int i;
6528
6529 cik_select_se_sh(rdev, se, sh);
6530 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6531 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6532 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6533
6534 tmp &= 0xffff0000;
6535
6536 tmp |= tmp1;
6537 tmp >>= 16;
6538
6539 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6540 mask <<= 1;
6541 mask |= 1;
6542 }
6543
6544 return (~tmp) & mask;
6545}
6546
6547static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6548{
6549 u32 i, j, k, active_cu_number = 0;
6550 u32 mask, counter, cu_bitmap;
6551 u32 tmp = 0;
6552
6553 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6554 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6555 mask = 1;
6556 cu_bitmap = 0;
6557 counter = 0;
6558 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6559 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6560 if (counter < 2)
6561 cu_bitmap |= mask;
6562 counter ++;
6563 }
6564 mask <<= 1;
6565 }
6566
6567 active_cu_number += counter;
6568 tmp |= (cu_bitmap << (i * 16 + j * 8));
6569 }
6570 }
6571
6572 WREG32(RLC_PG_AO_CU_MASK, tmp);
6573
6574 tmp = RREG32(RLC_MAX_PG_CU);
6575 tmp &= ~MAX_PU_CU_MASK;
6576 tmp |= MAX_PU_CU(active_cu_number);
6577 WREG32(RLC_MAX_PG_CU, tmp);
6578}
6579
6580static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6581 bool enable)
6582{
6583 u32 data, orig;
6584
6585 orig = data = RREG32(RLC_PG_CNTL);
6586 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6587 data |= STATIC_PER_CU_PG_ENABLE;
6588 else
6589 data &= ~STATIC_PER_CU_PG_ENABLE;
6590 if (orig != data)
6591 WREG32(RLC_PG_CNTL, data);
6592}
6593
6594static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6595 bool enable)
6596{
6597 u32 data, orig;
6598
6599 orig = data = RREG32(RLC_PG_CNTL);
6600 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6601 data |= DYN_PER_CU_PG_ENABLE;
6602 else
6603 data &= ~DYN_PER_CU_PG_ENABLE;
6604 if (orig != data)
6605 WREG32(RLC_PG_CNTL, data);
6606}
6607
6608#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6609#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6610
6611static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6612{
6613 u32 data, orig;
6614 u32 i;
6615
6616 if (rdev->rlc.cs_data) {
6617 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6618 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6619 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6620 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6621 } else {
6622 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6623 for (i = 0; i < 3; i++)
6624 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6625 }
6626 if (rdev->rlc.reg_list) {
6627 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6628 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6629 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6630 }
6631
6632 orig = data = RREG32(RLC_PG_CNTL);
6633 data |= GFX_PG_SRC;
6634 if (orig != data)
6635 WREG32(RLC_PG_CNTL, data);
6636
6637 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6638 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6639
6640 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6641 data &= ~IDLE_POLL_COUNT_MASK;
6642 data |= IDLE_POLL_COUNT(0x60);
6643 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6644
6645 data = 0x10101010;
6646 WREG32(RLC_PG_DELAY, data);
6647
6648 data = RREG32(RLC_PG_DELAY_2);
6649 data &= ~0xff;
6650 data |= 0x3;
6651 WREG32(RLC_PG_DELAY_2, data);
6652
6653 data = RREG32(RLC_AUTO_PG_CTRL);
6654 data &= ~GRBM_REG_SGIT_MASK;
6655 data |= GRBM_REG_SGIT(0x700);
6656 WREG32(RLC_AUTO_PG_CTRL, data);
6657
6658}
6659
6660static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6661{
6662 cik_enable_gfx_cgpg(rdev, enable);
6663 cik_enable_gfx_static_mgpg(rdev, enable);
6664 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6665}
6666
6667u32 cik_get_csb_size(struct radeon_device *rdev)
6668{
6669 u32 count = 0;
6670 const struct cs_section_def *sect = NULL;
6671 const struct cs_extent_def *ext = NULL;
6672
6673 if (rdev->rlc.cs_data == NULL)
6674 return 0;
6675
6676 /* begin clear state */
6677 count += 2;
6678 /* context control state */
6679 count += 3;
6680
6681 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6682 for (ext = sect->section; ext->extent != NULL; ++ext) {
6683 if (sect->id == SECT_CONTEXT)
6684 count += 2 + ext->reg_count;
6685 else
6686 return 0;
6687 }
6688 }
6689 /* pa_sc_raster_config/pa_sc_raster_config1 */
6690 count += 4;
6691 /* end clear state */
6692 count += 2;
6693 /* clear state */
6694 count += 2;
6695
6696 return count;
6697}
6698
6699void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6700{
6701 u32 count = 0, i;
6702 const struct cs_section_def *sect = NULL;
6703 const struct cs_extent_def *ext = NULL;
6704
6705 if (rdev->rlc.cs_data == NULL)
6706 return;
6707 if (buffer == NULL)
6708 return;
6709
6710 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6711 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6712
6713 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6714 buffer[count++] = cpu_to_le32(0x80000000);
6715 buffer[count++] = cpu_to_le32(0x80000000);
6716
6717 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6718 for (ext = sect->section; ext->extent != NULL; ++ext) {
6719 if (sect->id == SECT_CONTEXT) {
6720 buffer[count++] =
6721 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6722 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6723 for (i = 0; i < ext->reg_count; i++)
6724 buffer[count++] = cpu_to_le32(ext->extent[i]);
6725 } else {
6726 return;
6727 }
6728 }
6729 }
6730
6731 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6732 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6733 switch (rdev->family) {
6734 case CHIP_BONAIRE:
6735 buffer[count++] = cpu_to_le32(0x16000012);
6736 buffer[count++] = cpu_to_le32(0x00000000);
6737 break;
6738 case CHIP_KAVERI:
6739 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6740 buffer[count++] = cpu_to_le32(0x00000000);
6741 break;
6742 case CHIP_KABINI:
6743 case CHIP_MULLINS:
6744 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6745 buffer[count++] = cpu_to_le32(0x00000000);
6746 break;
6747 case CHIP_HAWAII:
6748 buffer[count++] = cpu_to_le32(0x3a00161a);
6749 buffer[count++] = cpu_to_le32(0x0000002e);
6750 break;
6751 default:
6752 buffer[count++] = cpu_to_le32(0x00000000);
6753 buffer[count++] = cpu_to_le32(0x00000000);
6754 break;
6755 }
6756
6757 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6758 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6759
6760 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6761 buffer[count++] = cpu_to_le32(0);
6762}
6763
6764static void cik_init_pg(struct radeon_device *rdev)
6765{
6766 if (rdev->pg_flags) {
6767 cik_enable_sck_slowdown_on_pu(rdev, true);
6768 cik_enable_sck_slowdown_on_pd(rdev, true);
6769 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6770 cik_init_gfx_cgpg(rdev);
6771 cik_enable_cp_pg(rdev, true);
6772 cik_enable_gds_pg(rdev, true);
6773 }
6774 cik_init_ao_cu_mask(rdev);
6775 cik_update_gfx_pg(rdev, true);
6776 }
6777}
6778
6779static void cik_fini_pg(struct radeon_device *rdev)
6780{
6781 if (rdev->pg_flags) {
6782 cik_update_gfx_pg(rdev, false);
6783 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6784 cik_enable_cp_pg(rdev, false);
6785 cik_enable_gds_pg(rdev, false);
6786 }
6787 }
6788}
6789
6790/*
6791 * Interrupts
6792 * Starting with r6xx, interrupts are handled via a ring buffer.
6793 * Ring buffers are areas of GPU accessible memory that the GPU
6794 * writes interrupt vectors into and the host reads vectors out of.
6795 * There is a rptr (read pointer) that determines where the
6796 * host is currently reading, and a wptr (write pointer)
6797 * which determines where the GPU has written. When the
6798 * pointers are equal, the ring is idle. When the GPU
6799 * writes vectors to the ring buffer, it increments the
6800 * wptr. When there is an interrupt, the host then starts
6801 * fetching commands and processing them until the pointers are
6802 * equal again at which point it updates the rptr.
6803 */
6804
6805/**
6806 * cik_enable_interrupts - Enable the interrupt ring buffer
6807 *
6808 * @rdev: radeon_device pointer
6809 *
6810 * Enable the interrupt ring buffer (CIK).
6811 */
6812static void cik_enable_interrupts(struct radeon_device *rdev)
6813{
6814 u32 ih_cntl = RREG32(IH_CNTL);
6815 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6816
6817 ih_cntl |= ENABLE_INTR;
6818 ih_rb_cntl |= IH_RB_ENABLE;
6819 WREG32(IH_CNTL, ih_cntl);
6820 WREG32(IH_RB_CNTL, ih_rb_cntl);
6821 rdev->ih.enabled = true;
6822}
6823
6824/**
6825 * cik_disable_interrupts - Disable the interrupt ring buffer
6826 *
6827 * @rdev: radeon_device pointer
6828 *
6829 * Disable the interrupt ring buffer (CIK).
6830 */
6831static void cik_disable_interrupts(struct radeon_device *rdev)
6832{
6833 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6834 u32 ih_cntl = RREG32(IH_CNTL);
6835
6836 ih_rb_cntl &= ~IH_RB_ENABLE;
6837 ih_cntl &= ~ENABLE_INTR;
6838 WREG32(IH_RB_CNTL, ih_rb_cntl);
6839 WREG32(IH_CNTL, ih_cntl);
6840 /* set rptr, wptr to 0 */
6841 WREG32(IH_RB_RPTR, 0);
6842 WREG32(IH_RB_WPTR, 0);
6843 rdev->ih.enabled = false;
6844 rdev->ih.rptr = 0;
6845}
6846
6847/**
6848 * cik_disable_interrupt_state - Disable all interrupt sources
6849 *
6850 * @rdev: radeon_device pointer
6851 *
6852 * Clear all interrupt enable bits used by the driver (CIK).
6853 */
6854static void cik_disable_interrupt_state(struct radeon_device *rdev)
6855{
6856 u32 tmp;
6857
6858 /* gfx ring */
6859 tmp = RREG32(CP_INT_CNTL_RING0) &
6860 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6861 WREG32(CP_INT_CNTL_RING0, tmp);
6862 /* sdma */
6863 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6864 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6865 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6866 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6867 /* compute queues */
6868 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6869 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6870 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6871 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6872 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6873 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6874 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6875 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6876 /* grbm */
6877 WREG32(GRBM_INT_CNTL, 0);
6878 /* SRBM */
6879 WREG32(SRBM_INT_CNTL, 0);
6880 /* vline/vblank, etc. */
6881 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6882 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6883 if (rdev->num_crtc >= 4) {
6884 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6885 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6886 }
6887 if (rdev->num_crtc >= 6) {
6888 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6889 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6890 }
6891 /* pflip */
6892 if (rdev->num_crtc >= 2) {
6893 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6894 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6895 }
6896 if (rdev->num_crtc >= 4) {
6897 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6898 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6899 }
6900 if (rdev->num_crtc >= 6) {
6901 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6902 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6903 }
6904
6905 /* dac hotplug */
6906 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6907
6908 /* digital hotplug */
6909 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6910 WREG32(DC_HPD1_INT_CONTROL, tmp);
6911 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6912 WREG32(DC_HPD2_INT_CONTROL, tmp);
6913 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6914 WREG32(DC_HPD3_INT_CONTROL, tmp);
6915 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6916 WREG32(DC_HPD4_INT_CONTROL, tmp);
6917 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6918 WREG32(DC_HPD5_INT_CONTROL, tmp);
6919 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6920 WREG32(DC_HPD6_INT_CONTROL, tmp);
6921
6922}
6923
6924/**
6925 * cik_irq_init - init and enable the interrupt ring
6926 *
6927 * @rdev: radeon_device pointer
6928 *
6929 * Allocate a ring buffer for the interrupt controller,
6930 * enable the RLC, disable interrupts, enable the IH
6931 * ring buffer and enable it (CIK).
6932 * Called at device load and reume.
6933 * Returns 0 for success, errors for failure.
6934 */
6935static int cik_irq_init(struct radeon_device *rdev)
6936{
6937 int ret = 0;
6938 int rb_bufsz;
6939 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6940
6941 /* allocate ring */
6942 ret = r600_ih_ring_alloc(rdev);
6943 if (ret)
6944 return ret;
6945
6946 /* disable irqs */
6947 cik_disable_interrupts(rdev);
6948
6949 /* init rlc */
6950 ret = cik_rlc_resume(rdev);
6951 if (ret) {
6952 r600_ih_ring_fini(rdev);
6953 return ret;
6954 }
6955
6956 /* setup interrupt control */
6957 /* set dummy read address to dummy page address */
6958 WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
6959 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6960 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6961 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6962 */
6963 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6964 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6965 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6966 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6967
6968 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6969 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6970
6971 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6972 IH_WPTR_OVERFLOW_CLEAR |
6973 (rb_bufsz << 1));
6974
6975 if (rdev->wb.enabled)
6976 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6977
6978 /* set the writeback address whether it's enabled or not */
6979 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6980 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6981
6982 WREG32(IH_RB_CNTL, ih_rb_cntl);
6983
6984 /* set rptr, wptr to 0 */
6985 WREG32(IH_RB_RPTR, 0);
6986 WREG32(IH_RB_WPTR, 0);
6987
6988 /* Default settings for IH_CNTL (disabled at first) */
6989 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6990 /* RPTR_REARM only works if msi's are enabled */
6991 if (rdev->msi_enabled)
6992 ih_cntl |= RPTR_REARM;
6993 WREG32(IH_CNTL, ih_cntl);
6994
6995 /* force the active interrupt state to all disabled */
6996 cik_disable_interrupt_state(rdev);
6997
6998 pci_set_master(rdev->pdev);
6999
7000 /* enable irqs */
7001 cik_enable_interrupts(rdev);
7002
7003 return ret;
7004}
7005
7006/**
7007 * cik_irq_set - enable/disable interrupt sources
7008 *
7009 * @rdev: radeon_device pointer
7010 *
7011 * Enable interrupt sources on the GPU (vblanks, hpd,
7012 * etc.) (CIK).
7013 * Returns 0 for success, errors for failure.
7014 */
7015int cik_irq_set(struct radeon_device *rdev)
7016{
7017 u32 cp_int_cntl;
7018 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7019 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7020 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7021 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7022 u32 grbm_int_cntl = 0;
7023 u32 dma_cntl, dma_cntl1;
7024
7025 if (!rdev->irq.installed) {
7026 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7027 return -EINVAL;
7028 }
7029 /* don't enable anything if the ih is disabled */
7030 if (!rdev->ih.enabled) {
7031 cik_disable_interrupts(rdev);
7032 /* force the active interrupt state to all disabled */
7033 cik_disable_interrupt_state(rdev);
7034 return 0;
7035 }
7036
7037 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7038 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7039 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7040
7041 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7042 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7043 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7044 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7045 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7046 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7047
7048 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7049 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7050
7051 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7052 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7053 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7054 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7055 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7056 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7057 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7058 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7059
7060 /* enable CP interrupts on all rings */
7061 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7062 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7063 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7064 }
7065 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7066 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7067 DRM_DEBUG("si_irq_set: sw int cp1\n");
7068 if (ring->me == 1) {
7069 switch (ring->pipe) {
7070 case 0:
7071 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7072 break;
7073 case 1:
7074 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7075 break;
7076 case 2:
7077 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7078 break;
7079 case 3:
7080 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7081 break;
7082 default:
7083 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7084 break;
7085 }
7086 } else if (ring->me == 2) {
7087 switch (ring->pipe) {
7088 case 0:
7089 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7090 break;
7091 case 1:
7092 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7093 break;
7094 case 2:
7095 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7096 break;
7097 case 3:
7098 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7099 break;
7100 default:
7101 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7102 break;
7103 }
7104 } else {
7105 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7106 }
7107 }
7108 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7109 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7110 DRM_DEBUG("si_irq_set: sw int cp2\n");
7111 if (ring->me == 1) {
7112 switch (ring->pipe) {
7113 case 0:
7114 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7115 break;
7116 case 1:
7117 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7118 break;
7119 case 2:
7120 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7121 break;
7122 case 3:
7123 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7124 break;
7125 default:
7126 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7127 break;
7128 }
7129 } else if (ring->me == 2) {
7130 switch (ring->pipe) {
7131 case 0:
7132 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7133 break;
7134 case 1:
7135 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7136 break;
7137 case 2:
7138 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7139 break;
7140 case 3:
7141 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7142 break;
7143 default:
7144 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7145 break;
7146 }
7147 } else {
7148 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7149 }
7150 }
7151
7152 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7153 DRM_DEBUG("cik_irq_set: sw int dma\n");
7154 dma_cntl |= TRAP_ENABLE;
7155 }
7156
7157 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7158 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7159 dma_cntl1 |= TRAP_ENABLE;
7160 }
7161
7162 if (rdev->irq.crtc_vblank_int[0] ||
7163 atomic_read(&rdev->irq.pflip[0])) {
7164 DRM_DEBUG("cik_irq_set: vblank 0\n");
7165 crtc1 |= VBLANK_INTERRUPT_MASK;
7166 }
7167 if (rdev->irq.crtc_vblank_int[1] ||
7168 atomic_read(&rdev->irq.pflip[1])) {
7169 DRM_DEBUG("cik_irq_set: vblank 1\n");
7170 crtc2 |= VBLANK_INTERRUPT_MASK;
7171 }
7172 if (rdev->irq.crtc_vblank_int[2] ||
7173 atomic_read(&rdev->irq.pflip[2])) {
7174 DRM_DEBUG("cik_irq_set: vblank 2\n");
7175 crtc3 |= VBLANK_INTERRUPT_MASK;
7176 }
7177 if (rdev->irq.crtc_vblank_int[3] ||
7178 atomic_read(&rdev->irq.pflip[3])) {
7179 DRM_DEBUG("cik_irq_set: vblank 3\n");
7180 crtc4 |= VBLANK_INTERRUPT_MASK;
7181 }
7182 if (rdev->irq.crtc_vblank_int[4] ||
7183 atomic_read(&rdev->irq.pflip[4])) {
7184 DRM_DEBUG("cik_irq_set: vblank 4\n");
7185 crtc5 |= VBLANK_INTERRUPT_MASK;
7186 }
7187 if (rdev->irq.crtc_vblank_int[5] ||
7188 atomic_read(&rdev->irq.pflip[5])) {
7189 DRM_DEBUG("cik_irq_set: vblank 5\n");
7190 crtc6 |= VBLANK_INTERRUPT_MASK;
7191 }
7192 if (rdev->irq.hpd[0]) {
7193 DRM_DEBUG("cik_irq_set: hpd 1\n");
7194 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7195 }
7196 if (rdev->irq.hpd[1]) {
7197 DRM_DEBUG("cik_irq_set: hpd 2\n");
7198 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7199 }
7200 if (rdev->irq.hpd[2]) {
7201 DRM_DEBUG("cik_irq_set: hpd 3\n");
7202 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7203 }
7204 if (rdev->irq.hpd[3]) {
7205 DRM_DEBUG("cik_irq_set: hpd 4\n");
7206 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7207 }
7208 if (rdev->irq.hpd[4]) {
7209 DRM_DEBUG("cik_irq_set: hpd 5\n");
7210 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7211 }
7212 if (rdev->irq.hpd[5]) {
7213 DRM_DEBUG("cik_irq_set: hpd 6\n");
7214 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7215 }
7216
7217 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7218
7219 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7220 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7221
7222 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7223 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7224 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7225 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7226 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7227 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7228 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7229 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7230
7231 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7232
7233 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7234 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7235 if (rdev->num_crtc >= 4) {
7236 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7237 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7238 }
7239 if (rdev->num_crtc >= 6) {
7240 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7241 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7242 }
7243
7244 if (rdev->num_crtc >= 2) {
7245 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7246 GRPH_PFLIP_INT_MASK);
7247 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7248 GRPH_PFLIP_INT_MASK);
7249 }
7250 if (rdev->num_crtc >= 4) {
7251 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7252 GRPH_PFLIP_INT_MASK);
7253 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7254 GRPH_PFLIP_INT_MASK);
7255 }
7256 if (rdev->num_crtc >= 6) {
7257 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7258 GRPH_PFLIP_INT_MASK);
7259 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260 GRPH_PFLIP_INT_MASK);
7261 }
7262
7263 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7264 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7265 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7266 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7267 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7268 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7269
7270 /* posting read */
7271 RREG32(SRBM_STATUS);
7272
7273 return 0;
7274}
7275
7276/**
7277 * cik_irq_ack - ack interrupt sources
7278 *
7279 * @rdev: radeon_device pointer
7280 *
7281 * Ack interrupt sources on the GPU (vblanks, hpd,
7282 * etc.) (CIK). Certain interrupts sources are sw
7283 * generated and do not require an explicit ack.
7284 */
7285static inline void cik_irq_ack(struct radeon_device *rdev)
7286{
7287 u32 tmp;
7288
7289 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7290 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7291 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7292 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7293 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7294 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7295 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7296
7297 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7298 EVERGREEN_CRTC0_REGISTER_OFFSET);
7299 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7300 EVERGREEN_CRTC1_REGISTER_OFFSET);
7301 if (rdev->num_crtc >= 4) {
7302 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7303 EVERGREEN_CRTC2_REGISTER_OFFSET);
7304 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7305 EVERGREEN_CRTC3_REGISTER_OFFSET);
7306 }
7307 if (rdev->num_crtc >= 6) {
7308 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7309 EVERGREEN_CRTC4_REGISTER_OFFSET);
7310 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7311 EVERGREEN_CRTC5_REGISTER_OFFSET);
7312 }
7313
7314 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7315 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7316 GRPH_PFLIP_INT_CLEAR);
7317 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7318 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7319 GRPH_PFLIP_INT_CLEAR);
7320 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7321 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7322 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7323 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7324 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7325 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7326 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7327 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7328
7329 if (rdev->num_crtc >= 4) {
7330 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7331 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7332 GRPH_PFLIP_INT_CLEAR);
7333 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7334 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7335 GRPH_PFLIP_INT_CLEAR);
7336 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7337 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7338 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7339 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7340 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7341 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7342 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7343 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7344 }
7345
7346 if (rdev->num_crtc >= 6) {
7347 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7348 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7349 GRPH_PFLIP_INT_CLEAR);
7350 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7351 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7352 GRPH_PFLIP_INT_CLEAR);
7353 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7354 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7355 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7356 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7357 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7358 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7359 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7360 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7361 }
7362
7363 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7364 tmp = RREG32(DC_HPD1_INT_CONTROL);
7365 tmp |= DC_HPDx_INT_ACK;
7366 WREG32(DC_HPD1_INT_CONTROL, tmp);
7367 }
7368 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7369 tmp = RREG32(DC_HPD2_INT_CONTROL);
7370 tmp |= DC_HPDx_INT_ACK;
7371 WREG32(DC_HPD2_INT_CONTROL, tmp);
7372 }
7373 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7374 tmp = RREG32(DC_HPD3_INT_CONTROL);
7375 tmp |= DC_HPDx_INT_ACK;
7376 WREG32(DC_HPD3_INT_CONTROL, tmp);
7377 }
7378 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7379 tmp = RREG32(DC_HPD4_INT_CONTROL);
7380 tmp |= DC_HPDx_INT_ACK;
7381 WREG32(DC_HPD4_INT_CONTROL, tmp);
7382 }
7383 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7384 tmp = RREG32(DC_HPD5_INT_CONTROL);
7385 tmp |= DC_HPDx_INT_ACK;
7386 WREG32(DC_HPD5_INT_CONTROL, tmp);
7387 }
7388 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7389 tmp = RREG32(DC_HPD6_INT_CONTROL);
7390 tmp |= DC_HPDx_INT_ACK;
7391 WREG32(DC_HPD6_INT_CONTROL, tmp);
7392 }
7393 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7394 tmp = RREG32(DC_HPD1_INT_CONTROL);
7395 tmp |= DC_HPDx_RX_INT_ACK;
7396 WREG32(DC_HPD1_INT_CONTROL, tmp);
7397 }
7398 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7399 tmp = RREG32(DC_HPD2_INT_CONTROL);
7400 tmp |= DC_HPDx_RX_INT_ACK;
7401 WREG32(DC_HPD2_INT_CONTROL, tmp);
7402 }
7403 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7404 tmp = RREG32(DC_HPD3_INT_CONTROL);
7405 tmp |= DC_HPDx_RX_INT_ACK;
7406 WREG32(DC_HPD3_INT_CONTROL, tmp);
7407 }
7408 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7409 tmp = RREG32(DC_HPD4_INT_CONTROL);
7410 tmp |= DC_HPDx_RX_INT_ACK;
7411 WREG32(DC_HPD4_INT_CONTROL, tmp);
7412 }
7413 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7414 tmp = RREG32(DC_HPD5_INT_CONTROL);
7415 tmp |= DC_HPDx_RX_INT_ACK;
7416 WREG32(DC_HPD5_INT_CONTROL, tmp);
7417 }
7418 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7419 tmp = RREG32(DC_HPD6_INT_CONTROL);
7420 tmp |= DC_HPDx_RX_INT_ACK;
7421 WREG32(DC_HPD6_INT_CONTROL, tmp);
7422 }
7423}
7424
7425/**
7426 * cik_irq_disable - disable interrupts
7427 *
7428 * @rdev: radeon_device pointer
7429 *
7430 * Disable interrupts on the hw (CIK).
7431 */
7432static void cik_irq_disable(struct radeon_device *rdev)
7433{
7434 cik_disable_interrupts(rdev);
7435 /* Wait and acknowledge irq */
7436 mdelay(1);
7437 cik_irq_ack(rdev);
7438 cik_disable_interrupt_state(rdev);
7439}
7440
7441/**
7442 * cik_irq_suspend - disable interrupts for suspend
7443 *
7444 * @rdev: radeon_device pointer
7445 *
7446 * Disable interrupts and stop the RLC (CIK).
7447 * Used for suspend.
7448 */
7449static void cik_irq_suspend(struct radeon_device *rdev)
7450{
7451 cik_irq_disable(rdev);
7452 cik_rlc_stop(rdev);
7453}
7454
7455/**
7456 * cik_irq_fini - tear down interrupt support
7457 *
7458 * @rdev: radeon_device pointer
7459 *
7460 * Disable interrupts on the hw and free the IH ring
7461 * buffer (CIK).
7462 * Used for driver unload.
7463 */
7464static void cik_irq_fini(struct radeon_device *rdev)
7465{
7466 cik_irq_suspend(rdev);
7467 r600_ih_ring_fini(rdev);
7468}
7469
7470/**
7471 * cik_get_ih_wptr - get the IH ring buffer wptr
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Get the IH ring buffer wptr from either the register
7476 * or the writeback memory buffer (CIK). Also check for
7477 * ring buffer overflow and deal with it.
7478 * Used by cik_irq_process().
7479 * Returns the value of the wptr.
7480 */
7481static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7482{
7483 u32 wptr, tmp;
7484
7485 if (rdev->wb.enabled)
7486 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7487 else
7488 wptr = RREG32(IH_RB_WPTR);
7489
7490 if (wptr & RB_OVERFLOW) {
7491 wptr &= ~RB_OVERFLOW;
7492 /* When a ring buffer overflow happen start parsing interrupt
7493 * from the last not overwritten vector (wptr + 16). Hopefully
7494 * this should allow us to catchup.
7495 */
7496 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7497 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7498 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7499 tmp = RREG32(IH_RB_CNTL);
7500 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7501 WREG32(IH_RB_CNTL, tmp);
7502 }
7503 return (wptr & rdev->ih.ptr_mask);
7504}
7505
7506/* CIK IV Ring
7507 * Each IV ring entry is 128 bits:
7508 * [7:0] - interrupt source id
7509 * [31:8] - reserved
7510 * [59:32] - interrupt source data
7511 * [63:60] - reserved
7512 * [71:64] - RINGID
7513 * CP:
7514 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7515 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7516 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7517 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7518 * PIPE_ID - ME0 0=3D
7519 * - ME1&2 compute dispatcher (4 pipes each)
7520 * SDMA:
7521 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7522 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7523 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7524 * [79:72] - VMID
7525 * [95:80] - PASID
7526 * [127:96] - reserved
7527 */
7528/**
7529 * cik_irq_process - interrupt handler
7530 *
7531 * @rdev: radeon_device pointer
7532 *
7533 * Interrupt hander (CIK). Walk the IH ring,
7534 * ack interrupts and schedule work to handle
7535 * interrupt events.
7536 * Returns irq process return code.
7537 */
7538int cik_irq_process(struct radeon_device *rdev)
7539{
7540 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7541 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7542 u32 wptr;
7543 u32 rptr;
7544 u32 src_id, src_data, ring_id;
7545 u8 me_id, pipe_id, queue_id;
7546 u32 ring_index;
7547 bool queue_hotplug = false;
7548 bool queue_dp = false;
7549 bool queue_reset = false;
7550 u32 addr, status, mc_client;
7551 bool queue_thermal = false;
7552
7553 if (!rdev->ih.enabled || rdev->shutdown)
7554 return IRQ_NONE;
7555
7556 wptr = cik_get_ih_wptr(rdev);
7557
7558restart_ih:
7559 /* is somebody else already processing irqs? */
7560 if (atomic_xchg(&rdev->ih.lock, 1))
7561 return IRQ_NONE;
7562
7563 rptr = rdev->ih.rptr;
7564 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7565
7566 /* Order reading of wptr vs. reading of IH ring data */
7567 rmb();
7568
7569 /* display interrupts */
7570 cik_irq_ack(rdev);
7571
7572 while (rptr != wptr) {
7573 /* wptr/rptr are in bytes! */
7574 ring_index = rptr / 4;
7575
7576 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7577 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7578 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7579
7580 switch (src_id) {
7581 case 1: /* D1 vblank/vline */
7582 switch (src_data) {
7583 case 0: /* D1 vblank */
7584 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7585 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7586
7587 if (rdev->irq.crtc_vblank_int[0]) {
7588 drm_handle_vblank(rdev_to_drm(rdev), 0);
7589 rdev->pm.vblank_sync = true;
7590 wake_up(&rdev->irq.vblank_queue);
7591 }
7592 if (atomic_read(&rdev->irq.pflip[0]))
7593 radeon_crtc_handle_vblank(rdev, 0);
7594 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7595 DRM_DEBUG("IH: D1 vblank\n");
7596
7597 break;
7598 case 1: /* D1 vline */
7599 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7600 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7603 DRM_DEBUG("IH: D1 vline\n");
7604
7605 break;
7606 default:
7607 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7608 break;
7609 }
7610 break;
7611 case 2: /* D2 vblank/vline */
7612 switch (src_data) {
7613 case 0: /* D2 vblank */
7614 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7615 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617 if (rdev->irq.crtc_vblank_int[1]) {
7618 drm_handle_vblank(rdev_to_drm(rdev), 1);
7619 rdev->pm.vblank_sync = true;
7620 wake_up(&rdev->irq.vblank_queue);
7621 }
7622 if (atomic_read(&rdev->irq.pflip[1]))
7623 radeon_crtc_handle_vblank(rdev, 1);
7624 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7625 DRM_DEBUG("IH: D2 vblank\n");
7626
7627 break;
7628 case 1: /* D2 vline */
7629 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7630 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7633 DRM_DEBUG("IH: D2 vline\n");
7634
7635 break;
7636 default:
7637 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7638 break;
7639 }
7640 break;
7641 case 3: /* D3 vblank/vline */
7642 switch (src_data) {
7643 case 0: /* D3 vblank */
7644 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7645 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647 if (rdev->irq.crtc_vblank_int[2]) {
7648 drm_handle_vblank(rdev_to_drm(rdev), 2);
7649 rdev->pm.vblank_sync = true;
7650 wake_up(&rdev->irq.vblank_queue);
7651 }
7652 if (atomic_read(&rdev->irq.pflip[2]))
7653 radeon_crtc_handle_vblank(rdev, 2);
7654 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7655 DRM_DEBUG("IH: D3 vblank\n");
7656
7657 break;
7658 case 1: /* D3 vline */
7659 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7660 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7663 DRM_DEBUG("IH: D3 vline\n");
7664
7665 break;
7666 default:
7667 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7668 break;
7669 }
7670 break;
7671 case 4: /* D4 vblank/vline */
7672 switch (src_data) {
7673 case 0: /* D4 vblank */
7674 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7675 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677 if (rdev->irq.crtc_vblank_int[3]) {
7678 drm_handle_vblank(rdev_to_drm(rdev), 3);
7679 rdev->pm.vblank_sync = true;
7680 wake_up(&rdev->irq.vblank_queue);
7681 }
7682 if (atomic_read(&rdev->irq.pflip[3]))
7683 radeon_crtc_handle_vblank(rdev, 3);
7684 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7685 DRM_DEBUG("IH: D4 vblank\n");
7686
7687 break;
7688 case 1: /* D4 vline */
7689 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7690 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7693 DRM_DEBUG("IH: D4 vline\n");
7694
7695 break;
7696 default:
7697 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7698 break;
7699 }
7700 break;
7701 case 5: /* D5 vblank/vline */
7702 switch (src_data) {
7703 case 0: /* D5 vblank */
7704 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7705 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707 if (rdev->irq.crtc_vblank_int[4]) {
7708 drm_handle_vblank(rdev_to_drm(rdev), 4);
7709 rdev->pm.vblank_sync = true;
7710 wake_up(&rdev->irq.vblank_queue);
7711 }
7712 if (atomic_read(&rdev->irq.pflip[4]))
7713 radeon_crtc_handle_vblank(rdev, 4);
7714 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7715 DRM_DEBUG("IH: D5 vblank\n");
7716
7717 break;
7718 case 1: /* D5 vline */
7719 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7720 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7723 DRM_DEBUG("IH: D5 vline\n");
7724
7725 break;
7726 default:
7727 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7728 break;
7729 }
7730 break;
7731 case 6: /* D6 vblank/vline */
7732 switch (src_data) {
7733 case 0: /* D6 vblank */
7734 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7735 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737 if (rdev->irq.crtc_vblank_int[5]) {
7738 drm_handle_vblank(rdev_to_drm(rdev), 5);
7739 rdev->pm.vblank_sync = true;
7740 wake_up(&rdev->irq.vblank_queue);
7741 }
7742 if (atomic_read(&rdev->irq.pflip[5]))
7743 radeon_crtc_handle_vblank(rdev, 5);
7744 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7745 DRM_DEBUG("IH: D6 vblank\n");
7746
7747 break;
7748 case 1: /* D6 vline */
7749 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7750 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7753 DRM_DEBUG("IH: D6 vline\n");
7754
7755 break;
7756 default:
7757 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7758 break;
7759 }
7760 break;
7761 case 8: /* D1 page flip */
7762 case 10: /* D2 page flip */
7763 case 12: /* D3 page flip */
7764 case 14: /* D4 page flip */
7765 case 16: /* D5 page flip */
7766 case 18: /* D6 page flip */
7767 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7768 if (radeon_use_pflipirq > 0)
7769 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7770 break;
7771 case 42: /* HPD hotplug */
7772 switch (src_data) {
7773 case 0:
7774 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7775 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7776
7777 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7778 queue_hotplug = true;
7779 DRM_DEBUG("IH: HPD1\n");
7780
7781 break;
7782 case 1:
7783 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7784 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7785
7786 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7787 queue_hotplug = true;
7788 DRM_DEBUG("IH: HPD2\n");
7789
7790 break;
7791 case 2:
7792 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7793 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7794
7795 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7796 queue_hotplug = true;
7797 DRM_DEBUG("IH: HPD3\n");
7798
7799 break;
7800 case 3:
7801 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7802 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7803
7804 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7805 queue_hotplug = true;
7806 DRM_DEBUG("IH: HPD4\n");
7807
7808 break;
7809 case 4:
7810 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7811 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7812
7813 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7814 queue_hotplug = true;
7815 DRM_DEBUG("IH: HPD5\n");
7816
7817 break;
7818 case 5:
7819 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7820 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7821
7822 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7823 queue_hotplug = true;
7824 DRM_DEBUG("IH: HPD6\n");
7825
7826 break;
7827 case 6:
7828 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7829 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7830
7831 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7832 queue_dp = true;
7833 DRM_DEBUG("IH: HPD_RX 1\n");
7834
7835 break;
7836 case 7:
7837 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7838 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7839
7840 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7841 queue_dp = true;
7842 DRM_DEBUG("IH: HPD_RX 2\n");
7843
7844 break;
7845 case 8:
7846 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7847 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7848
7849 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7850 queue_dp = true;
7851 DRM_DEBUG("IH: HPD_RX 3\n");
7852
7853 break;
7854 case 9:
7855 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7856 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7857
7858 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7859 queue_dp = true;
7860 DRM_DEBUG("IH: HPD_RX 4\n");
7861
7862 break;
7863 case 10:
7864 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7865 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7866
7867 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7868 queue_dp = true;
7869 DRM_DEBUG("IH: HPD_RX 5\n");
7870
7871 break;
7872 case 11:
7873 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7874 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7875
7876 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7877 queue_dp = true;
7878 DRM_DEBUG("IH: HPD_RX 6\n");
7879
7880 break;
7881 default:
7882 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7883 break;
7884 }
7885 break;
7886 case 96:
7887 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7888 WREG32(SRBM_INT_ACK, 0x1);
7889 break;
7890 case 124: /* UVD */
7891 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7892 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7893 break;
7894 case 146:
7895 case 147:
7896 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7897 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7898 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7899 /* reset addr and status */
7900 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7901 if (addr == 0x0 && status == 0x0)
7902 break;
7903 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7904 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7905 addr);
7906 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7907 status);
7908 cik_vm_decode_fault(rdev, status, addr, mc_client);
7909 break;
7910 case 167: /* VCE */
7911 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7912 switch (src_data) {
7913 case 0:
7914 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7915 break;
7916 case 1:
7917 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7918 break;
7919 default:
7920 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7921 break;
7922 }
7923 break;
7924 case 176: /* GFX RB CP_INT */
7925 case 177: /* GFX IB CP_INT */
7926 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7927 break;
7928 case 181: /* CP EOP event */
7929 DRM_DEBUG("IH: CP EOP\n");
7930 /* XXX check the bitfield order! */
7931 me_id = (ring_id & 0x60) >> 5;
7932 pipe_id = (ring_id & 0x18) >> 3;
7933 queue_id = (ring_id & 0x7) >> 0;
7934 switch (me_id) {
7935 case 0:
7936 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7937 break;
7938 case 1:
7939 case 2:
7940 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7941 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7942 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7943 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7944 break;
7945 }
7946 break;
7947 case 184: /* CP Privileged reg access */
7948 DRM_ERROR("Illegal register access in command stream\n");
7949 /* XXX check the bitfield order! */
7950 me_id = (ring_id & 0x60) >> 5;
7951 switch (me_id) {
7952 case 0:
7953 /* This results in a full GPU reset, but all we need to do is soft
7954 * reset the CP for gfx
7955 */
7956 queue_reset = true;
7957 break;
7958 case 1:
7959 /* XXX compute */
7960 queue_reset = true;
7961 break;
7962 case 2:
7963 /* XXX compute */
7964 queue_reset = true;
7965 break;
7966 }
7967 break;
7968 case 185: /* CP Privileged inst */
7969 DRM_ERROR("Illegal instruction in command stream\n");
7970 /* XXX check the bitfield order! */
7971 me_id = (ring_id & 0x60) >> 5;
7972 switch (me_id) {
7973 case 0:
7974 /* This results in a full GPU reset, but all we need to do is soft
7975 * reset the CP for gfx
7976 */
7977 queue_reset = true;
7978 break;
7979 case 1:
7980 /* XXX compute */
7981 queue_reset = true;
7982 break;
7983 case 2:
7984 /* XXX compute */
7985 queue_reset = true;
7986 break;
7987 }
7988 break;
7989 case 224: /* SDMA trap event */
7990 /* XXX check the bitfield order! */
7991 me_id = (ring_id & 0x3) >> 0;
7992 queue_id = (ring_id & 0xc) >> 2;
7993 DRM_DEBUG("IH: SDMA trap\n");
7994 switch (me_id) {
7995 case 0:
7996 switch (queue_id) {
7997 case 0:
7998 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7999 break;
8000 case 1:
8001 /* XXX compute */
8002 break;
8003 case 2:
8004 /* XXX compute */
8005 break;
8006 }
8007 break;
8008 case 1:
8009 switch (queue_id) {
8010 case 0:
8011 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8012 break;
8013 case 1:
8014 /* XXX compute */
8015 break;
8016 case 2:
8017 /* XXX compute */
8018 break;
8019 }
8020 break;
8021 }
8022 break;
8023 case 230: /* thermal low to high */
8024 DRM_DEBUG("IH: thermal low to high\n");
8025 rdev->pm.dpm.thermal.high_to_low = false;
8026 queue_thermal = true;
8027 break;
8028 case 231: /* thermal high to low */
8029 DRM_DEBUG("IH: thermal high to low\n");
8030 rdev->pm.dpm.thermal.high_to_low = true;
8031 queue_thermal = true;
8032 break;
8033 case 233: /* GUI IDLE */
8034 DRM_DEBUG("IH: GUI idle\n");
8035 break;
8036 case 241: /* SDMA Privileged inst */
8037 case 247: /* SDMA Privileged inst */
8038 DRM_ERROR("Illegal instruction in SDMA command stream\n");
8039 /* XXX check the bitfield order! */
8040 me_id = (ring_id & 0x3) >> 0;
8041 queue_id = (ring_id & 0xc) >> 2;
8042 switch (me_id) {
8043 case 0:
8044 switch (queue_id) {
8045 case 0:
8046 queue_reset = true;
8047 break;
8048 case 1:
8049 /* XXX compute */
8050 queue_reset = true;
8051 break;
8052 case 2:
8053 /* XXX compute */
8054 queue_reset = true;
8055 break;
8056 }
8057 break;
8058 case 1:
8059 switch (queue_id) {
8060 case 0:
8061 queue_reset = true;
8062 break;
8063 case 1:
8064 /* XXX compute */
8065 queue_reset = true;
8066 break;
8067 case 2:
8068 /* XXX compute */
8069 queue_reset = true;
8070 break;
8071 }
8072 break;
8073 }
8074 break;
8075 default:
8076 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8077 break;
8078 }
8079
8080 /* wptr/rptr are in bytes! */
8081 rptr += 16;
8082 rptr &= rdev->ih.ptr_mask;
8083 WREG32(IH_RB_RPTR, rptr);
8084 }
8085 if (queue_dp)
8086 schedule_work(&rdev->dp_work);
8087 if (queue_hotplug)
8088 schedule_delayed_work(&rdev->hotplug_work, 0);
8089 if (queue_reset) {
8090 rdev->needs_reset = true;
8091 wake_up_all(&rdev->fence_queue);
8092 }
8093 if (queue_thermal)
8094 schedule_work(&rdev->pm.dpm.thermal.work);
8095 rdev->ih.rptr = rptr;
8096 atomic_set(&rdev->ih.lock, 0);
8097
8098 /* make sure wptr hasn't changed while processing */
8099 wptr = cik_get_ih_wptr(rdev);
8100 if (wptr != rptr)
8101 goto restart_ih;
8102
8103 return IRQ_HANDLED;
8104}
8105
8106/*
8107 * startup/shutdown callbacks
8108 */
8109static void cik_uvd_init(struct radeon_device *rdev)
8110{
8111 int r;
8112
8113 if (!rdev->has_uvd)
8114 return;
8115
8116 r = radeon_uvd_init(rdev);
8117 if (r) {
8118 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8119 /*
8120 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8121 * to early fails cik_uvd_start() and thus nothing happens
8122 * there. So it is pointless to try to go through that code
8123 * hence why we disable uvd here.
8124 */
8125 rdev->has_uvd = false;
8126 return;
8127 }
8128 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8129 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8130}
8131
8132static void cik_uvd_start(struct radeon_device *rdev)
8133{
8134 int r;
8135
8136 if (!rdev->has_uvd)
8137 return;
8138
8139 r = radeon_uvd_resume(rdev);
8140 if (r) {
8141 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8142 goto error;
8143 }
8144 r = uvd_v4_2_resume(rdev);
8145 if (r) {
8146 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8147 goto error;
8148 }
8149 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8150 if (r) {
8151 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8152 goto error;
8153 }
8154 return;
8155
8156error:
8157 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8158}
8159
8160static void cik_uvd_resume(struct radeon_device *rdev)
8161{
8162 struct radeon_ring *ring;
8163 int r;
8164
8165 if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8166 return;
8167
8168 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8169 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8170 if (r) {
8171 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8172 return;
8173 }
8174 r = uvd_v1_0_init(rdev);
8175 if (r) {
8176 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8177 return;
8178 }
8179}
8180
8181static void cik_vce_init(struct radeon_device *rdev)
8182{
8183 int r;
8184
8185 if (!rdev->has_vce)
8186 return;
8187
8188 r = radeon_vce_init(rdev);
8189 if (r) {
8190 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8191 /*
8192 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8193 * to early fails cik_vce_start() and thus nothing happens
8194 * there. So it is pointless to try to go through that code
8195 * hence why we disable vce here.
8196 */
8197 rdev->has_vce = false;
8198 return;
8199 }
8200 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8201 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8202 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8203 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8204}
8205
8206static void cik_vce_start(struct radeon_device *rdev)
8207{
8208 int r;
8209
8210 if (!rdev->has_vce)
8211 return;
8212
8213 r = radeon_vce_resume(rdev);
8214 if (r) {
8215 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8216 goto error;
8217 }
8218 r = vce_v2_0_resume(rdev);
8219 if (r) {
8220 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8221 goto error;
8222 }
8223 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8224 if (r) {
8225 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8226 goto error;
8227 }
8228 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8229 if (r) {
8230 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8231 goto error;
8232 }
8233 return;
8234
8235error:
8236 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8237 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8238}
8239
8240static void cik_vce_resume(struct radeon_device *rdev)
8241{
8242 struct radeon_ring *ring;
8243 int r;
8244
8245 if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8246 return;
8247
8248 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8249 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8250 if (r) {
8251 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8252 return;
8253 }
8254 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8255 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8256 if (r) {
8257 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8258 return;
8259 }
8260 r = vce_v1_0_init(rdev);
8261 if (r) {
8262 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8263 return;
8264 }
8265}
8266
8267/**
8268 * cik_startup - program the asic to a functional state
8269 *
8270 * @rdev: radeon_device pointer
8271 *
8272 * Programs the asic to a functional state (CIK).
8273 * Called by cik_init() and cik_resume().
8274 * Returns 0 for success, error for failure.
8275 */
8276static int cik_startup(struct radeon_device *rdev)
8277{
8278 struct radeon_ring *ring;
8279 u32 nop;
8280 int r;
8281
8282 /* enable pcie gen2/3 link */
8283 cik_pcie_gen3_enable(rdev);
8284 /* enable aspm */
8285 cik_program_aspm(rdev);
8286
8287 /* scratch needs to be initialized before MC */
8288 r = r600_vram_scratch_init(rdev);
8289 if (r)
8290 return r;
8291
8292 cik_mc_program(rdev);
8293
8294 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8295 r = ci_mc_load_microcode(rdev);
8296 if (r) {
8297 DRM_ERROR("Failed to load MC firmware!\n");
8298 return r;
8299 }
8300 }
8301
8302 r = cik_pcie_gart_enable(rdev);
8303 if (r)
8304 return r;
8305 cik_gpu_init(rdev);
8306
8307 /* allocate rlc buffers */
8308 if (rdev->flags & RADEON_IS_IGP) {
8309 if (rdev->family == CHIP_KAVERI) {
8310 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8311 rdev->rlc.reg_list_size =
8312 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8313 } else {
8314 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8315 rdev->rlc.reg_list_size =
8316 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8317 }
8318 }
8319 rdev->rlc.cs_data = ci_cs_data;
8320 rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8321 rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8322 r = sumo_rlc_init(rdev);
8323 if (r) {
8324 DRM_ERROR("Failed to init rlc BOs!\n");
8325 return r;
8326 }
8327
8328 /* allocate wb buffer */
8329 r = radeon_wb_init(rdev);
8330 if (r)
8331 return r;
8332
8333 /* allocate mec buffers */
8334 r = cik_mec_init(rdev);
8335 if (r) {
8336 DRM_ERROR("Failed to init MEC BOs!\n");
8337 return r;
8338 }
8339
8340 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8341 if (r) {
8342 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8343 return r;
8344 }
8345
8346 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8347 if (r) {
8348 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8349 return r;
8350 }
8351
8352 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8353 if (r) {
8354 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8355 return r;
8356 }
8357
8358 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8359 if (r) {
8360 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8361 return r;
8362 }
8363
8364 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8365 if (r) {
8366 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8367 return r;
8368 }
8369
8370 cik_uvd_start(rdev);
8371 cik_vce_start(rdev);
8372
8373 /* Enable IRQ */
8374 if (!rdev->irq.installed) {
8375 r = radeon_irq_kms_init(rdev);
8376 if (r)
8377 return r;
8378 }
8379
8380 r = cik_irq_init(rdev);
8381 if (r) {
8382 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8383 radeon_irq_kms_fini(rdev);
8384 return r;
8385 }
8386 cik_irq_set(rdev);
8387
8388 if (rdev->family == CHIP_HAWAII) {
8389 if (rdev->new_fw)
8390 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8391 else
8392 nop = RADEON_CP_PACKET2;
8393 } else {
8394 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8395 }
8396
8397 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8398 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8399 nop);
8400 if (r)
8401 return r;
8402
8403 /* set up the compute queues */
8404 /* type-2 packets are deprecated on MEC, use type-3 instead */
8405 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8406 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8407 nop);
8408 if (r)
8409 return r;
8410 ring->me = 1; /* first MEC */
8411 ring->pipe = 0; /* first pipe */
8412 ring->queue = 0; /* first queue */
8413 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8414
8415 /* type-2 packets are deprecated on MEC, use type-3 instead */
8416 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8417 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8418 nop);
8419 if (r)
8420 return r;
8421 /* dGPU only have 1 MEC */
8422 ring->me = 1; /* first MEC */
8423 ring->pipe = 0; /* first pipe */
8424 ring->queue = 1; /* second queue */
8425 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8426
8427 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8428 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8429 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8430 if (r)
8431 return r;
8432
8433 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8434 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8435 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8436 if (r)
8437 return r;
8438
8439 r = cik_cp_resume(rdev);
8440 if (r)
8441 return r;
8442
8443 r = cik_sdma_resume(rdev);
8444 if (r)
8445 return r;
8446
8447 cik_uvd_resume(rdev);
8448 cik_vce_resume(rdev);
8449
8450 r = radeon_ib_pool_init(rdev);
8451 if (r) {
8452 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8453 return r;
8454 }
8455
8456 r = radeon_vm_manager_init(rdev);
8457 if (r) {
8458 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8459 return r;
8460 }
8461
8462 r = radeon_audio_init(rdev);
8463 if (r)
8464 return r;
8465
8466 return 0;
8467}
8468
8469/**
8470 * cik_resume - resume the asic to a functional state
8471 *
8472 * @rdev: radeon_device pointer
8473 *
8474 * Programs the asic to a functional state (CIK).
8475 * Called at resume.
8476 * Returns 0 for success, error for failure.
8477 */
8478int cik_resume(struct radeon_device *rdev)
8479{
8480 int r;
8481
8482 /* post card */
8483 atom_asic_init(rdev->mode_info.atom_context);
8484
8485 /* init golden registers */
8486 cik_init_golden_registers(rdev);
8487
8488 if (rdev->pm.pm_method == PM_METHOD_DPM)
8489 radeon_pm_resume(rdev);
8490
8491 rdev->accel_working = true;
8492 r = cik_startup(rdev);
8493 if (r) {
8494 DRM_ERROR("cik startup failed on resume\n");
8495 rdev->accel_working = false;
8496 return r;
8497 }
8498
8499 return r;
8500
8501}
8502
8503/**
8504 * cik_suspend - suspend the asic
8505 *
8506 * @rdev: radeon_device pointer
8507 *
8508 * Bring the chip into a state suitable for suspend (CIK).
8509 * Called at suspend.
8510 * Returns 0 for success.
8511 */
8512int cik_suspend(struct radeon_device *rdev)
8513{
8514 radeon_pm_suspend(rdev);
8515 radeon_audio_fini(rdev);
8516 radeon_vm_manager_fini(rdev);
8517 cik_cp_enable(rdev, false);
8518 cik_sdma_enable(rdev, false);
8519 if (rdev->has_uvd) {
8520 radeon_uvd_suspend(rdev);
8521 uvd_v1_0_fini(rdev);
8522 }
8523 if (rdev->has_vce)
8524 radeon_vce_suspend(rdev);
8525 cik_fini_pg(rdev);
8526 cik_fini_cg(rdev);
8527 cik_irq_suspend(rdev);
8528 radeon_wb_disable(rdev);
8529 cik_pcie_gart_disable(rdev);
8530 return 0;
8531}
8532
8533/* Plan is to move initialization in that function and use
8534 * helper function so that radeon_device_init pretty much
8535 * do nothing more than calling asic specific function. This
8536 * should also allow to remove a bunch of callback function
8537 * like vram_info.
8538 */
8539/**
8540 * cik_init - asic specific driver and hw init
8541 *
8542 * @rdev: radeon_device pointer
8543 *
8544 * Setup asic specific driver variables and program the hw
8545 * to a functional state (CIK).
8546 * Called at driver startup.
8547 * Returns 0 for success, errors for failure.
8548 */
8549int cik_init(struct radeon_device *rdev)
8550{
8551 struct radeon_ring *ring;
8552 int r;
8553
8554 /* Read BIOS */
8555 if (!radeon_get_bios(rdev)) {
8556 if (ASIC_IS_AVIVO(rdev))
8557 return -EINVAL;
8558 }
8559 /* Must be an ATOMBIOS */
8560 if (!rdev->is_atom_bios) {
8561 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8562 return -EINVAL;
8563 }
8564 r = radeon_atombios_init(rdev);
8565 if (r)
8566 return r;
8567
8568 /* Post card if necessary */
8569 if (!radeon_card_posted(rdev)) {
8570 if (!rdev->bios) {
8571 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8572 return -EINVAL;
8573 }
8574 DRM_INFO("GPU not posted. posting now...\n");
8575 atom_asic_init(rdev->mode_info.atom_context);
8576 }
8577 /* init golden registers */
8578 cik_init_golden_registers(rdev);
8579 /* Initialize scratch registers */
8580 cik_scratch_init(rdev);
8581 /* Initialize surface registers */
8582 radeon_surface_init(rdev);
8583 /* Initialize clocks */
8584 radeon_get_clock_info(rdev_to_drm(rdev));
8585
8586 /* Fence driver */
8587 radeon_fence_driver_init(rdev);
8588
8589 /* initialize memory controller */
8590 r = cik_mc_init(rdev);
8591 if (r)
8592 return r;
8593 /* Memory manager */
8594 r = radeon_bo_init(rdev);
8595 if (r)
8596 return r;
8597
8598 if (rdev->flags & RADEON_IS_IGP) {
8599 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8600 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8601 r = cik_init_microcode(rdev);
8602 if (r) {
8603 DRM_ERROR("Failed to load firmware!\n");
8604 return r;
8605 }
8606 }
8607 } else {
8608 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8609 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8610 !rdev->mc_fw) {
8611 r = cik_init_microcode(rdev);
8612 if (r) {
8613 DRM_ERROR("Failed to load firmware!\n");
8614 return r;
8615 }
8616 }
8617 }
8618
8619 /* Initialize power management */
8620 radeon_pm_init(rdev);
8621
8622 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8623 ring->ring_obj = NULL;
8624 r600_ring_init(rdev, ring, 1024 * 1024);
8625
8626 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8627 ring->ring_obj = NULL;
8628 r600_ring_init(rdev, ring, 1024 * 1024);
8629 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8630 if (r)
8631 return r;
8632
8633 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8634 ring->ring_obj = NULL;
8635 r600_ring_init(rdev, ring, 1024 * 1024);
8636 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8637 if (r)
8638 return r;
8639
8640 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8641 ring->ring_obj = NULL;
8642 r600_ring_init(rdev, ring, 256 * 1024);
8643
8644 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8645 ring->ring_obj = NULL;
8646 r600_ring_init(rdev, ring, 256 * 1024);
8647
8648 cik_uvd_init(rdev);
8649 cik_vce_init(rdev);
8650
8651 rdev->ih.ring_obj = NULL;
8652 r600_ih_ring_init(rdev, 64 * 1024);
8653
8654 r = r600_pcie_gart_init(rdev);
8655 if (r)
8656 return r;
8657
8658 rdev->accel_working = true;
8659 r = cik_startup(rdev);
8660 if (r) {
8661 dev_err(rdev->dev, "disabling GPU acceleration\n");
8662 cik_cp_fini(rdev);
8663 cik_sdma_fini(rdev);
8664 cik_irq_fini(rdev);
8665 sumo_rlc_fini(rdev);
8666 cik_mec_fini(rdev);
8667 radeon_wb_fini(rdev);
8668 radeon_ib_pool_fini(rdev);
8669 radeon_vm_manager_fini(rdev);
8670 radeon_irq_kms_fini(rdev);
8671 cik_pcie_gart_fini(rdev);
8672 rdev->accel_working = false;
8673 }
8674
8675 /* Don't start up if the MC ucode is missing.
8676 * The default clocks and voltages before the MC ucode
8677 * is loaded are not suffient for advanced operations.
8678 */
8679 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8680 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8681 return -EINVAL;
8682 }
8683
8684 return 0;
8685}
8686
8687/**
8688 * cik_fini - asic specific driver and hw fini
8689 *
8690 * @rdev: radeon_device pointer
8691 *
8692 * Tear down the asic specific driver variables and program the hw
8693 * to an idle state (CIK).
8694 * Called at driver unload.
8695 */
8696void cik_fini(struct radeon_device *rdev)
8697{
8698 radeon_pm_fini(rdev);
8699 cik_cp_fini(rdev);
8700 cik_sdma_fini(rdev);
8701 cik_fini_pg(rdev);
8702 cik_fini_cg(rdev);
8703 cik_irq_fini(rdev);
8704 sumo_rlc_fini(rdev);
8705 cik_mec_fini(rdev);
8706 radeon_wb_fini(rdev);
8707 radeon_vm_manager_fini(rdev);
8708 radeon_ib_pool_fini(rdev);
8709 radeon_irq_kms_fini(rdev);
8710 uvd_v1_0_fini(rdev);
8711 radeon_uvd_fini(rdev);
8712 radeon_vce_fini(rdev);
8713 cik_pcie_gart_fini(rdev);
8714 r600_vram_scratch_fini(rdev);
8715 radeon_gem_fini(rdev);
8716 radeon_fence_driver_fini(rdev);
8717 radeon_bo_fini(rdev);
8718 radeon_atombios_fini(rdev);
8719 kfree(rdev->bios);
8720 rdev->bios = NULL;
8721}
8722
8723void dce8_program_fmt(struct drm_encoder *encoder)
8724{
8725 struct drm_device *dev = encoder->dev;
8726 struct radeon_device *rdev = dev->dev_private;
8727 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8728 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8729 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8730 int bpc = 0;
8731 u32 tmp = 0;
8732 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8733
8734 if (connector) {
8735 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8736 bpc = radeon_get_monitor_bpc(connector);
8737 dither = radeon_connector->dither;
8738 }
8739
8740 /* LVDS/eDP FMT is set up by atom */
8741 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8742 return;
8743
8744 /* not needed for analog */
8745 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8746 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8747 return;
8748
8749 if (bpc == 0)
8750 return;
8751
8752 switch (bpc) {
8753 case 6:
8754 if (dither == RADEON_FMT_DITHER_ENABLE)
8755 /* XXX sort out optimal dither settings */
8756 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8757 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8758 else
8759 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8760 break;
8761 case 8:
8762 if (dither == RADEON_FMT_DITHER_ENABLE)
8763 /* XXX sort out optimal dither settings */
8764 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8765 FMT_RGB_RANDOM_ENABLE |
8766 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8767 else
8768 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8769 break;
8770 case 10:
8771 if (dither == RADEON_FMT_DITHER_ENABLE)
8772 /* XXX sort out optimal dither settings */
8773 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8774 FMT_RGB_RANDOM_ENABLE |
8775 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8776 else
8777 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8778 break;
8779 default:
8780 /* not needed */
8781 break;
8782 }
8783
8784 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8785}
8786
8787/* display watermark setup */
8788/**
8789 * dce8_line_buffer_adjust - Set up the line buffer
8790 *
8791 * @rdev: radeon_device pointer
8792 * @radeon_crtc: the selected display controller
8793 * @mode: the current display mode on the selected display
8794 * controller
8795 *
8796 * Setup up the line buffer allocation for
8797 * the selected display controller (CIK).
8798 * Returns the line buffer size in pixels.
8799 */
8800static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8801 struct radeon_crtc *radeon_crtc,
8802 struct drm_display_mode *mode)
8803{
8804 u32 tmp, buffer_alloc, i;
8805 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8806 /*
8807 * Line Buffer Setup
8808 * There are 6 line buffers, one for each display controllers.
8809 * There are 3 partitions per LB. Select the number of partitions
8810 * to enable based on the display width. For display widths larger
8811 * than 4096, you need use to use 2 display controllers and combine
8812 * them using the stereo blender.
8813 */
8814 if (radeon_crtc->base.enabled && mode) {
8815 if (mode->crtc_hdisplay < 1920) {
8816 tmp = 1;
8817 buffer_alloc = 2;
8818 } else if (mode->crtc_hdisplay < 2560) {
8819 tmp = 2;
8820 buffer_alloc = 2;
8821 } else if (mode->crtc_hdisplay < 4096) {
8822 tmp = 0;
8823 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8824 } else {
8825 DRM_DEBUG_KMS("Mode too big for LB!\n");
8826 tmp = 0;
8827 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8828 }
8829 } else {
8830 tmp = 1;
8831 buffer_alloc = 0;
8832 }
8833
8834 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8835 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8836
8837 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8838 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8839 for (i = 0; i < rdev->usec_timeout; i++) {
8840 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8841 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8842 break;
8843 udelay(1);
8844 }
8845
8846 if (radeon_crtc->base.enabled && mode) {
8847 switch (tmp) {
8848 case 0:
8849 default:
8850 return 4096 * 2;
8851 case 1:
8852 return 1920 * 2;
8853 case 2:
8854 return 2560 * 2;
8855 }
8856 }
8857
8858 /* controller not enabled, so no lb used */
8859 return 0;
8860}
8861
8862/**
8863 * cik_get_number_of_dram_channels - get the number of dram channels
8864 *
8865 * @rdev: radeon_device pointer
8866 *
8867 * Look up the number of video ram channels (CIK).
8868 * Used for display watermark bandwidth calculations
8869 * Returns the number of dram channels
8870 */
8871static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8872{
8873 u32 tmp = RREG32(MC_SHARED_CHMAP);
8874
8875 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8876 case 0:
8877 default:
8878 return 1;
8879 case 1:
8880 return 2;
8881 case 2:
8882 return 4;
8883 case 3:
8884 return 8;
8885 case 4:
8886 return 3;
8887 case 5:
8888 return 6;
8889 case 6:
8890 return 10;
8891 case 7:
8892 return 12;
8893 case 8:
8894 return 16;
8895 }
8896}
8897
8898struct dce8_wm_params {
8899 u32 dram_channels; /* number of dram channels */
8900 u32 yclk; /* bandwidth per dram data pin in kHz */
8901 u32 sclk; /* engine clock in kHz */
8902 u32 disp_clk; /* display clock in kHz */
8903 u32 src_width; /* viewport width */
8904 u32 active_time; /* active display time in ns */
8905 u32 blank_time; /* blank time in ns */
8906 bool interlaced; /* mode is interlaced */
8907 fixed20_12 vsc; /* vertical scale ratio */
8908 u32 num_heads; /* number of active crtcs */
8909 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8910 u32 lb_size; /* line buffer allocated to pipe */
8911 u32 vtaps; /* vertical scaler taps */
8912};
8913
8914/**
8915 * dce8_dram_bandwidth - get the dram bandwidth
8916 *
8917 * @wm: watermark calculation data
8918 *
8919 * Calculate the raw dram bandwidth (CIK).
8920 * Used for display watermark bandwidth calculations
8921 * Returns the dram bandwidth in MBytes/s
8922 */
8923static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8924{
8925 /* Calculate raw DRAM Bandwidth */
8926 fixed20_12 dram_efficiency; /* 0.7 */
8927 fixed20_12 yclk, dram_channels, bandwidth;
8928 fixed20_12 a;
8929
8930 a.full = dfixed_const(1000);
8931 yclk.full = dfixed_const(wm->yclk);
8932 yclk.full = dfixed_div(yclk, a);
8933 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8934 a.full = dfixed_const(10);
8935 dram_efficiency.full = dfixed_const(7);
8936 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8937 bandwidth.full = dfixed_mul(dram_channels, yclk);
8938 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8939
8940 return dfixed_trunc(bandwidth);
8941}
8942
8943/**
8944 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8945 *
8946 * @wm: watermark calculation data
8947 *
8948 * Calculate the dram bandwidth used for display (CIK).
8949 * Used for display watermark bandwidth calculations
8950 * Returns the dram bandwidth for display in MBytes/s
8951 */
8952static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8953{
8954 /* Calculate DRAM Bandwidth and the part allocated to display. */
8955 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8956 fixed20_12 yclk, dram_channels, bandwidth;
8957 fixed20_12 a;
8958
8959 a.full = dfixed_const(1000);
8960 yclk.full = dfixed_const(wm->yclk);
8961 yclk.full = dfixed_div(yclk, a);
8962 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8963 a.full = dfixed_const(10);
8964 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8965 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8966 bandwidth.full = dfixed_mul(dram_channels, yclk);
8967 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8968
8969 return dfixed_trunc(bandwidth);
8970}
8971
8972/**
8973 * dce8_data_return_bandwidth - get the data return bandwidth
8974 *
8975 * @wm: watermark calculation data
8976 *
8977 * Calculate the data return bandwidth used for display (CIK).
8978 * Used for display watermark bandwidth calculations
8979 * Returns the data return bandwidth in MBytes/s
8980 */
8981static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8982{
8983 /* Calculate the display Data return Bandwidth */
8984 fixed20_12 return_efficiency; /* 0.8 */
8985 fixed20_12 sclk, bandwidth;
8986 fixed20_12 a;
8987
8988 a.full = dfixed_const(1000);
8989 sclk.full = dfixed_const(wm->sclk);
8990 sclk.full = dfixed_div(sclk, a);
8991 a.full = dfixed_const(10);
8992 return_efficiency.full = dfixed_const(8);
8993 return_efficiency.full = dfixed_div(return_efficiency, a);
8994 a.full = dfixed_const(32);
8995 bandwidth.full = dfixed_mul(a, sclk);
8996 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8997
8998 return dfixed_trunc(bandwidth);
8999}
9000
9001/**
9002 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9003 *
9004 * @wm: watermark calculation data
9005 *
9006 * Calculate the dmif bandwidth used for display (CIK).
9007 * Used for display watermark bandwidth calculations
9008 * Returns the dmif bandwidth in MBytes/s
9009 */
9010static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9011{
9012 /* Calculate the DMIF Request Bandwidth */
9013 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9014 fixed20_12 disp_clk, bandwidth;
9015 fixed20_12 a, b;
9016
9017 a.full = dfixed_const(1000);
9018 disp_clk.full = dfixed_const(wm->disp_clk);
9019 disp_clk.full = dfixed_div(disp_clk, a);
9020 a.full = dfixed_const(32);
9021 b.full = dfixed_mul(a, disp_clk);
9022
9023 a.full = dfixed_const(10);
9024 disp_clk_request_efficiency.full = dfixed_const(8);
9025 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9026
9027 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9028
9029 return dfixed_trunc(bandwidth);
9030}
9031
9032/**
9033 * dce8_available_bandwidth - get the min available bandwidth
9034 *
9035 * @wm: watermark calculation data
9036 *
9037 * Calculate the min available bandwidth used for display (CIK).
9038 * Used for display watermark bandwidth calculations
9039 * Returns the min available bandwidth in MBytes/s
9040 */
9041static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9042{
9043 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9044 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9045 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9046 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9047
9048 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9049}
9050
9051/**
9052 * dce8_average_bandwidth - get the average available bandwidth
9053 *
9054 * @wm: watermark calculation data
9055 *
9056 * Calculate the average available bandwidth used for display (CIK).
9057 * Used for display watermark bandwidth calculations
9058 * Returns the average available bandwidth in MBytes/s
9059 */
9060static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9061{
9062 /* Calculate the display mode Average Bandwidth
9063 * DisplayMode should contain the source and destination dimensions,
9064 * timing, etc.
9065 */
9066 fixed20_12 bpp;
9067 fixed20_12 line_time;
9068 fixed20_12 src_width;
9069 fixed20_12 bandwidth;
9070 fixed20_12 a;
9071
9072 a.full = dfixed_const(1000);
9073 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9074 line_time.full = dfixed_div(line_time, a);
9075 bpp.full = dfixed_const(wm->bytes_per_pixel);
9076 src_width.full = dfixed_const(wm->src_width);
9077 bandwidth.full = dfixed_mul(src_width, bpp);
9078 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9079 bandwidth.full = dfixed_div(bandwidth, line_time);
9080
9081 return dfixed_trunc(bandwidth);
9082}
9083
9084/**
9085 * dce8_latency_watermark - get the latency watermark
9086 *
9087 * @wm: watermark calculation data
9088 *
9089 * Calculate the latency watermark (CIK).
9090 * Used for display watermark bandwidth calculations
9091 * Returns the latency watermark in ns
9092 */
9093static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9094{
9095 /* First calculate the latency in ns */
9096 u32 mc_latency = 2000; /* 2000 ns. */
9097 u32 available_bandwidth = dce8_available_bandwidth(wm);
9098 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9099 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9100 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9101 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9102 (wm->num_heads * cursor_line_pair_return_time);
9103 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9104 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9105 u32 tmp, dmif_size = 12288;
9106 fixed20_12 a, b, c;
9107
9108 if (wm->num_heads == 0)
9109 return 0;
9110
9111 a.full = dfixed_const(2);
9112 b.full = dfixed_const(1);
9113 if ((wm->vsc.full > a.full) ||
9114 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9115 (wm->vtaps >= 5) ||
9116 ((wm->vsc.full >= a.full) && wm->interlaced))
9117 max_src_lines_per_dst_line = 4;
9118 else
9119 max_src_lines_per_dst_line = 2;
9120
9121 a.full = dfixed_const(available_bandwidth);
9122 b.full = dfixed_const(wm->num_heads);
9123 a.full = dfixed_div(a, b);
9124 tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9125 tmp = min(dfixed_trunc(a), tmp);
9126
9127 lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9128
9129 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9130 b.full = dfixed_const(1000);
9131 c.full = dfixed_const(lb_fill_bw);
9132 b.full = dfixed_div(c, b);
9133 a.full = dfixed_div(a, b);
9134 line_fill_time = dfixed_trunc(a);
9135
9136 if (line_fill_time < wm->active_time)
9137 return latency;
9138 else
9139 return latency + (line_fill_time - wm->active_time);
9140
9141}
9142
9143/**
9144 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9145 * average and available dram bandwidth
9146 *
9147 * @wm: watermark calculation data
9148 *
9149 * Check if the display average bandwidth fits in the display
9150 * dram bandwidth (CIK).
9151 * Used for display watermark bandwidth calculations
9152 * Returns true if the display fits, false if not.
9153 */
9154static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9155{
9156 if (dce8_average_bandwidth(wm) <=
9157 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9158 return true;
9159 else
9160 return false;
9161}
9162
9163/**
9164 * dce8_average_bandwidth_vs_available_bandwidth - check
9165 * average and available bandwidth
9166 *
9167 * @wm: watermark calculation data
9168 *
9169 * Check if the display average bandwidth fits in the display
9170 * available bandwidth (CIK).
9171 * Used for display watermark bandwidth calculations
9172 * Returns true if the display fits, false if not.
9173 */
9174static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9175{
9176 if (dce8_average_bandwidth(wm) <=
9177 (dce8_available_bandwidth(wm) / wm->num_heads))
9178 return true;
9179 else
9180 return false;
9181}
9182
9183/**
9184 * dce8_check_latency_hiding - check latency hiding
9185 *
9186 * @wm: watermark calculation data
9187 *
9188 * Check latency hiding (CIK).
9189 * Used for display watermark bandwidth calculations
9190 * Returns true if the display fits, false if not.
9191 */
9192static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9193{
9194 u32 lb_partitions = wm->lb_size / wm->src_width;
9195 u32 line_time = wm->active_time + wm->blank_time;
9196 u32 latency_tolerant_lines;
9197 u32 latency_hiding;
9198 fixed20_12 a;
9199
9200 a.full = dfixed_const(1);
9201 if (wm->vsc.full > a.full)
9202 latency_tolerant_lines = 1;
9203 else {
9204 if (lb_partitions <= (wm->vtaps + 1))
9205 latency_tolerant_lines = 1;
9206 else
9207 latency_tolerant_lines = 2;
9208 }
9209
9210 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9211
9212 if (dce8_latency_watermark(wm) <= latency_hiding)
9213 return true;
9214 else
9215 return false;
9216}
9217
9218/**
9219 * dce8_program_watermarks - program display watermarks
9220 *
9221 * @rdev: radeon_device pointer
9222 * @radeon_crtc: the selected display controller
9223 * @lb_size: line buffer size
9224 * @num_heads: number of display controllers in use
9225 *
9226 * Calculate and program the display watermarks for the
9227 * selected display controller (CIK).
9228 */
9229static void dce8_program_watermarks(struct radeon_device *rdev,
9230 struct radeon_crtc *radeon_crtc,
9231 u32 lb_size, u32 num_heads)
9232{
9233 struct drm_display_mode *mode = &radeon_crtc->base.mode;
9234 struct dce8_wm_params wm_low, wm_high;
9235 u32 active_time;
9236 u32 line_time = 0;
9237 u32 latency_watermark_a = 0, latency_watermark_b = 0;
9238 u32 tmp, wm_mask;
9239
9240 if (radeon_crtc->base.enabled && num_heads && mode) {
9241 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9242 (u32)mode->clock);
9243 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9244 (u32)mode->clock);
9245 line_time = min(line_time, (u32)65535);
9246
9247 /* watermark for high clocks */
9248 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9249 rdev->pm.dpm_enabled) {
9250 wm_high.yclk =
9251 radeon_dpm_get_mclk(rdev, false) * 10;
9252 wm_high.sclk =
9253 radeon_dpm_get_sclk(rdev, false) * 10;
9254 } else {
9255 wm_high.yclk = rdev->pm.current_mclk * 10;
9256 wm_high.sclk = rdev->pm.current_sclk * 10;
9257 }
9258
9259 wm_high.disp_clk = mode->clock;
9260 wm_high.src_width = mode->crtc_hdisplay;
9261 wm_high.active_time = active_time;
9262 wm_high.blank_time = line_time - wm_high.active_time;
9263 wm_high.interlaced = false;
9264 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9265 wm_high.interlaced = true;
9266 wm_high.vsc = radeon_crtc->vsc;
9267 wm_high.vtaps = 1;
9268 if (radeon_crtc->rmx_type != RMX_OFF)
9269 wm_high.vtaps = 2;
9270 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9271 wm_high.lb_size = lb_size;
9272 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9273 wm_high.num_heads = num_heads;
9274
9275 /* set for high clocks */
9276 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9277
9278 /* possibly force display priority to high */
9279 /* should really do this at mode validation time... */
9280 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9281 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9282 !dce8_check_latency_hiding(&wm_high) ||
9283 (rdev->disp_priority == 2)) {
9284 DRM_DEBUG_KMS("force priority to high\n");
9285 }
9286
9287 /* watermark for low clocks */
9288 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9289 rdev->pm.dpm_enabled) {
9290 wm_low.yclk =
9291 radeon_dpm_get_mclk(rdev, true) * 10;
9292 wm_low.sclk =
9293 radeon_dpm_get_sclk(rdev, true) * 10;
9294 } else {
9295 wm_low.yclk = rdev->pm.current_mclk * 10;
9296 wm_low.sclk = rdev->pm.current_sclk * 10;
9297 }
9298
9299 wm_low.disp_clk = mode->clock;
9300 wm_low.src_width = mode->crtc_hdisplay;
9301 wm_low.active_time = active_time;
9302 wm_low.blank_time = line_time - wm_low.active_time;
9303 wm_low.interlaced = false;
9304 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9305 wm_low.interlaced = true;
9306 wm_low.vsc = radeon_crtc->vsc;
9307 wm_low.vtaps = 1;
9308 if (radeon_crtc->rmx_type != RMX_OFF)
9309 wm_low.vtaps = 2;
9310 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9311 wm_low.lb_size = lb_size;
9312 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9313 wm_low.num_heads = num_heads;
9314
9315 /* set for low clocks */
9316 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9317
9318 /* possibly force display priority to high */
9319 /* should really do this at mode validation time... */
9320 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9321 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9322 !dce8_check_latency_hiding(&wm_low) ||
9323 (rdev->disp_priority == 2)) {
9324 DRM_DEBUG_KMS("force priority to high\n");
9325 }
9326
9327 /* Save number of lines the linebuffer leads before the scanout */
9328 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9329 }
9330
9331 /* select wm A */
9332 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9333 tmp = wm_mask;
9334 tmp &= ~LATENCY_WATERMARK_MASK(3);
9335 tmp |= LATENCY_WATERMARK_MASK(1);
9336 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9337 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9338 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9339 LATENCY_HIGH_WATERMARK(line_time)));
9340 /* select wm B */
9341 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9342 tmp &= ~LATENCY_WATERMARK_MASK(3);
9343 tmp |= LATENCY_WATERMARK_MASK(2);
9344 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9345 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9346 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9347 LATENCY_HIGH_WATERMARK(line_time)));
9348 /* restore original selection */
9349 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9350
9351 /* save values for DPM */
9352 radeon_crtc->line_time = line_time;
9353 radeon_crtc->wm_high = latency_watermark_a;
9354 radeon_crtc->wm_low = latency_watermark_b;
9355}
9356
9357/**
9358 * dce8_bandwidth_update - program display watermarks
9359 *
9360 * @rdev: radeon_device pointer
9361 *
9362 * Calculate and program the display watermarks and line
9363 * buffer allocation (CIK).
9364 */
9365void dce8_bandwidth_update(struct radeon_device *rdev)
9366{
9367 struct drm_display_mode *mode = NULL;
9368 u32 num_heads = 0, lb_size;
9369 int i;
9370
9371 if (!rdev->mode_info.mode_config_initialized)
9372 return;
9373
9374 radeon_update_display_priority(rdev);
9375
9376 for (i = 0; i < rdev->num_crtc; i++) {
9377 if (rdev->mode_info.crtcs[i]->base.enabled)
9378 num_heads++;
9379 }
9380 for (i = 0; i < rdev->num_crtc; i++) {
9381 mode = &rdev->mode_info.crtcs[i]->base.mode;
9382 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9383 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9384 }
9385}
9386
9387/**
9388 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9389 *
9390 * @rdev: radeon_device pointer
9391 *
9392 * Fetches a GPU clock counter snapshot (SI).
9393 * Returns the 64 bit clock counter snapshot.
9394 */
9395uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9396{
9397 uint64_t clock;
9398
9399 mutex_lock(&rdev->gpu_clock_mutex);
9400 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9401 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9402 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9403 mutex_unlock(&rdev->gpu_clock_mutex);
9404 return clock;
9405}
9406
9407static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9408 u32 cntl_reg, u32 status_reg)
9409{
9410 int r, i;
9411 struct atom_clock_dividers dividers;
9412 uint32_t tmp;
9413
9414 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9415 clock, false, ÷rs);
9416 if (r)
9417 return r;
9418
9419 tmp = RREG32_SMC(cntl_reg);
9420 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9421 tmp |= dividers.post_divider;
9422 WREG32_SMC(cntl_reg, tmp);
9423
9424 for (i = 0; i < 100; i++) {
9425 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9426 break;
9427 mdelay(10);
9428 }
9429 if (i == 100)
9430 return -ETIMEDOUT;
9431
9432 return 0;
9433}
9434
9435int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9436{
9437 int r = 0;
9438
9439 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9440 if (r)
9441 return r;
9442
9443 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9444 return r;
9445}
9446
9447int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9448{
9449 int r, i;
9450 struct atom_clock_dividers dividers;
9451 u32 tmp;
9452
9453 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9454 ecclk, false, ÷rs);
9455 if (r)
9456 return r;
9457
9458 for (i = 0; i < 100; i++) {
9459 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9460 break;
9461 mdelay(10);
9462 }
9463 if (i == 100)
9464 return -ETIMEDOUT;
9465
9466 tmp = RREG32_SMC(CG_ECLK_CNTL);
9467 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9468 tmp |= dividers.post_divider;
9469 WREG32_SMC(CG_ECLK_CNTL, tmp);
9470
9471 for (i = 0; i < 100; i++) {
9472 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9473 break;
9474 mdelay(10);
9475 }
9476 if (i == 100)
9477 return -ETIMEDOUT;
9478
9479 return 0;
9480}
9481
9482static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9483{
9484 struct pci_dev *root = rdev->pdev->bus->self;
9485 enum pci_bus_speed speed_cap;
9486 u32 speed_cntl, current_data_rate;
9487 int i;
9488 u16 tmp16;
9489
9490 if (pci_is_root_bus(rdev->pdev->bus))
9491 return;
9492
9493 if (radeon_pcie_gen2 == 0)
9494 return;
9495
9496 if (rdev->flags & RADEON_IS_IGP)
9497 return;
9498
9499 if (!(rdev->flags & RADEON_IS_PCIE))
9500 return;
9501
9502 speed_cap = pcie_get_speed_cap(root);
9503 if (speed_cap == PCI_SPEED_UNKNOWN)
9504 return;
9505
9506 if ((speed_cap != PCIE_SPEED_8_0GT) &&
9507 (speed_cap != PCIE_SPEED_5_0GT))
9508 return;
9509
9510 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9511 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9512 LC_CURRENT_DATA_RATE_SHIFT;
9513 if (speed_cap == PCIE_SPEED_8_0GT) {
9514 if (current_data_rate == 2) {
9515 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9516 return;
9517 }
9518 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9519 } else if (speed_cap == PCIE_SPEED_5_0GT) {
9520 if (current_data_rate == 1) {
9521 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9522 return;
9523 }
9524 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9525 }
9526
9527 if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
9528 return;
9529
9530 if (speed_cap == PCIE_SPEED_8_0GT) {
9531 /* re-try equalization if gen3 is not already enabled */
9532 if (current_data_rate != 2) {
9533 u16 bridge_cfg, gpu_cfg;
9534 u16 bridge_cfg2, gpu_cfg2;
9535 u32 max_lw, current_lw, tmp;
9536
9537 pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9538 pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
9539
9540 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9541 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9542 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9543
9544 if (current_lw < max_lw) {
9545 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9546 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9547 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9548 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9549 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9550 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9551 }
9552 }
9553
9554 for (i = 0; i < 10; i++) {
9555 /* check status */
9556 pcie_capability_read_word(rdev->pdev,
9557 PCI_EXP_DEVSTA,
9558 &tmp16);
9559 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9560 break;
9561
9562 pcie_capability_read_word(root, PCI_EXP_LNKCTL,
9563 &bridge_cfg);
9564 pcie_capability_read_word(rdev->pdev,
9565 PCI_EXP_LNKCTL,
9566 &gpu_cfg);
9567
9568 pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
9569 &bridge_cfg2);
9570 pcie_capability_read_word(rdev->pdev,
9571 PCI_EXP_LNKCTL2,
9572 &gpu_cfg2);
9573
9574 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9575 tmp |= LC_SET_QUIESCE;
9576 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9577
9578 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9579 tmp |= LC_REDO_EQ;
9580 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9581
9582 msleep(100);
9583
9584 /* linkctl */
9585 pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
9586 PCI_EXP_LNKCTL_HAWD,
9587 bridge_cfg &
9588 PCI_EXP_LNKCTL_HAWD);
9589 pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
9590 PCI_EXP_LNKCTL_HAWD,
9591 gpu_cfg &
9592 PCI_EXP_LNKCTL_HAWD);
9593
9594 /* linkctl2 */
9595 pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
9596 PCI_EXP_LNKCTL2_ENTER_COMP |
9597 PCI_EXP_LNKCTL2_TX_MARGIN,
9598 bridge_cfg2 |
9599 (PCI_EXP_LNKCTL2_ENTER_COMP |
9600 PCI_EXP_LNKCTL2_TX_MARGIN));
9601 pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
9602 PCI_EXP_LNKCTL2_ENTER_COMP |
9603 PCI_EXP_LNKCTL2_TX_MARGIN,
9604 gpu_cfg2 |
9605 (PCI_EXP_LNKCTL2_ENTER_COMP |
9606 PCI_EXP_LNKCTL2_TX_MARGIN));
9607
9608 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9609 tmp &= ~LC_SET_QUIESCE;
9610 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9611 }
9612 }
9613 }
9614
9615 /* set the link speed */
9616 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9617 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9618 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9619
9620 tmp16 = 0;
9621 if (speed_cap == PCIE_SPEED_8_0GT)
9622 tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
9623 else if (speed_cap == PCIE_SPEED_5_0GT)
9624 tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
9625 else
9626 tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
9627 pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
9628 PCI_EXP_LNKCTL2_TLS, tmp16);
9629
9630 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9631 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9632 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9633
9634 for (i = 0; i < rdev->usec_timeout; i++) {
9635 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9636 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9637 break;
9638 udelay(1);
9639 }
9640}
9641
9642static void cik_program_aspm(struct radeon_device *rdev)
9643{
9644 u32 data, orig;
9645 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9646 bool disable_clkreq = false;
9647
9648 if (radeon_aspm == 0)
9649 return;
9650
9651 /* XXX double check IGPs */
9652 if (rdev->flags & RADEON_IS_IGP)
9653 return;
9654
9655 if (!(rdev->flags & RADEON_IS_PCIE))
9656 return;
9657
9658 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9659 data &= ~LC_XMIT_N_FTS_MASK;
9660 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9661 if (orig != data)
9662 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9663
9664 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9665 data |= LC_GO_TO_RECOVERY;
9666 if (orig != data)
9667 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9668
9669 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9670 data |= P_IGNORE_EDB_ERR;
9671 if (orig != data)
9672 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9673
9674 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9675 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9676 data |= LC_PMI_TO_L1_DIS;
9677 if (!disable_l0s)
9678 data |= LC_L0S_INACTIVITY(7);
9679
9680 if (!disable_l1) {
9681 data |= LC_L1_INACTIVITY(7);
9682 data &= ~LC_PMI_TO_L1_DIS;
9683 if (orig != data)
9684 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9685
9686 if (!disable_plloff_in_l1) {
9687 bool clk_req_support;
9688
9689 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9690 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9691 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9692 if (orig != data)
9693 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9694
9695 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9696 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9697 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9698 if (orig != data)
9699 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9700
9701 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9702 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9703 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9704 if (orig != data)
9705 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9706
9707 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9708 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9709 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9710 if (orig != data)
9711 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9712
9713 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9714 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9715 data |= LC_DYN_LANES_PWR_STATE(3);
9716 if (orig != data)
9717 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9718
9719 if (!disable_clkreq &&
9720 !pci_is_root_bus(rdev->pdev->bus)) {
9721 struct pci_dev *root = rdev->pdev->bus->self;
9722 u32 lnkcap;
9723
9724 clk_req_support = false;
9725 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9726 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9727 clk_req_support = true;
9728 } else {
9729 clk_req_support = false;
9730 }
9731
9732 if (clk_req_support) {
9733 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9734 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9735 if (orig != data)
9736 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9737
9738 orig = data = RREG32_SMC(THM_CLK_CNTL);
9739 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9740 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9741 if (orig != data)
9742 WREG32_SMC(THM_CLK_CNTL, data);
9743
9744 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9745 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9746 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9747 if (orig != data)
9748 WREG32_SMC(MISC_CLK_CTRL, data);
9749
9750 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9751 data &= ~BCLK_AS_XCLK;
9752 if (orig != data)
9753 WREG32_SMC(CG_CLKPIN_CNTL, data);
9754
9755 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9756 data &= ~FORCE_BIF_REFCLK_EN;
9757 if (orig != data)
9758 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9759
9760 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9761 data &= ~MPLL_CLKOUT_SEL_MASK;
9762 data |= MPLL_CLKOUT_SEL(4);
9763 if (orig != data)
9764 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9765 }
9766 }
9767 } else {
9768 if (orig != data)
9769 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9770 }
9771
9772 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9773 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9774 if (orig != data)
9775 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9776
9777 if (!disable_l0s) {
9778 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9779 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9780 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9781 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9782 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9783 data &= ~LC_L0S_INACTIVITY_MASK;
9784 if (orig != data)
9785 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9786 }
9787 }
9788 }
9789}
1/*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24
25#include <linux/firmware.h>
26#include <linux/slab.h>
27#include <linux/module.h>
28
29#include <drm/drm_pci.h>
30#include <drm/drm_vblank.h>
31
32#include "atom.h"
33#include "cik_blit_shaders.h"
34#include "cikd.h"
35#include "clearstate_ci.h"
36#include "radeon.h"
37#include "radeon_asic.h"
38#include "radeon_audio.h"
39#include "radeon_ucode.h"
40
41#define SH_MEM_CONFIG_GFX_DEFAULT \
42 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED)
43
44MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
45MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
46MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
47MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
48MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
49MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
50MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
51MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
52MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
53
54MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
55MODULE_FIRMWARE("radeon/bonaire_me.bin");
56MODULE_FIRMWARE("radeon/bonaire_ce.bin");
57MODULE_FIRMWARE("radeon/bonaire_mec.bin");
58MODULE_FIRMWARE("radeon/bonaire_mc.bin");
59MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
60MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
61MODULE_FIRMWARE("radeon/bonaire_smc.bin");
62MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
63
64MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
65MODULE_FIRMWARE("radeon/HAWAII_me.bin");
66MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
67MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
68MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
69MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
70MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
71MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
72MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
73
74MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
75MODULE_FIRMWARE("radeon/hawaii_me.bin");
76MODULE_FIRMWARE("radeon/hawaii_ce.bin");
77MODULE_FIRMWARE("radeon/hawaii_mec.bin");
78MODULE_FIRMWARE("radeon/hawaii_mc.bin");
79MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
80MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
81MODULE_FIRMWARE("radeon/hawaii_smc.bin");
82MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
83
84MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
85MODULE_FIRMWARE("radeon/KAVERI_me.bin");
86MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
87MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
88MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
89MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
90
91MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
92MODULE_FIRMWARE("radeon/kaveri_me.bin");
93MODULE_FIRMWARE("radeon/kaveri_ce.bin");
94MODULE_FIRMWARE("radeon/kaveri_mec.bin");
95MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
96MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
97MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
98
99MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
100MODULE_FIRMWARE("radeon/KABINI_me.bin");
101MODULE_FIRMWARE("radeon/KABINI_ce.bin");
102MODULE_FIRMWARE("radeon/KABINI_mec.bin");
103MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
104MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
105
106MODULE_FIRMWARE("radeon/kabini_pfp.bin");
107MODULE_FIRMWARE("radeon/kabini_me.bin");
108MODULE_FIRMWARE("radeon/kabini_ce.bin");
109MODULE_FIRMWARE("radeon/kabini_mec.bin");
110MODULE_FIRMWARE("radeon/kabini_rlc.bin");
111MODULE_FIRMWARE("radeon/kabini_sdma.bin");
112
113MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
114MODULE_FIRMWARE("radeon/MULLINS_me.bin");
115MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
116MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
117MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
118MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
119
120MODULE_FIRMWARE("radeon/mullins_pfp.bin");
121MODULE_FIRMWARE("radeon/mullins_me.bin");
122MODULE_FIRMWARE("radeon/mullins_ce.bin");
123MODULE_FIRMWARE("radeon/mullins_mec.bin");
124MODULE_FIRMWARE("radeon/mullins_rlc.bin");
125MODULE_FIRMWARE("radeon/mullins_sdma.bin");
126
127extern int r600_ih_ring_alloc(struct radeon_device *rdev);
128extern void r600_ih_ring_fini(struct radeon_device *rdev);
129extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131extern bool evergreen_is_display_hung(struct radeon_device *rdev);
132extern void sumo_rlc_fini(struct radeon_device *rdev);
133extern int sumo_rlc_init(struct radeon_device *rdev);
134extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
135extern void si_rlc_reset(struct radeon_device *rdev);
136extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
137static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
138extern int cik_sdma_resume(struct radeon_device *rdev);
139extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
140extern void cik_sdma_fini(struct radeon_device *rdev);
141extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
142static void cik_rlc_stop(struct radeon_device *rdev);
143static void cik_pcie_gen3_enable(struct radeon_device *rdev);
144static void cik_program_aspm(struct radeon_device *rdev);
145static void cik_init_pg(struct radeon_device *rdev);
146static void cik_init_cg(struct radeon_device *rdev);
147static void cik_fini_pg(struct radeon_device *rdev);
148static void cik_fini_cg(struct radeon_device *rdev);
149static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
150 bool enable);
151
152/**
153 * cik_get_allowed_info_register - fetch the register for the info ioctl
154 *
155 * @rdev: radeon_device pointer
156 * @reg: register offset in bytes
157 * @val: register value
158 *
159 * Returns 0 for success or -EINVAL for an invalid register
160 *
161 */
162int cik_get_allowed_info_register(struct radeon_device *rdev,
163 u32 reg, u32 *val)
164{
165 switch (reg) {
166 case GRBM_STATUS:
167 case GRBM_STATUS2:
168 case GRBM_STATUS_SE0:
169 case GRBM_STATUS_SE1:
170 case GRBM_STATUS_SE2:
171 case GRBM_STATUS_SE3:
172 case SRBM_STATUS:
173 case SRBM_STATUS2:
174 case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
175 case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
176 case UVD_STATUS:
177 /* TODO VCE */
178 *val = RREG32(reg);
179 return 0;
180 default:
181 return -EINVAL;
182 }
183}
184
185/*
186 * Indirect registers accessor
187 */
188u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
189{
190 unsigned long flags;
191 u32 r;
192
193 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
194 WREG32(CIK_DIDT_IND_INDEX, (reg));
195 r = RREG32(CIK_DIDT_IND_DATA);
196 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
197 return r;
198}
199
200void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
201{
202 unsigned long flags;
203
204 spin_lock_irqsave(&rdev->didt_idx_lock, flags);
205 WREG32(CIK_DIDT_IND_INDEX, (reg));
206 WREG32(CIK_DIDT_IND_DATA, (v));
207 spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
208}
209
210/* get temperature in millidegrees */
211int ci_get_temp(struct radeon_device *rdev)
212{
213 u32 temp;
214 int actual_temp = 0;
215
216 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
217 CTF_TEMP_SHIFT;
218
219 if (temp & 0x200)
220 actual_temp = 255;
221 else
222 actual_temp = temp & 0x1ff;
223
224 actual_temp = actual_temp * 1000;
225
226 return actual_temp;
227}
228
229/* get temperature in millidegrees */
230int kv_get_temp(struct radeon_device *rdev)
231{
232 u32 temp;
233 int actual_temp = 0;
234
235 temp = RREG32_SMC(0xC0300E0C);
236
237 if (temp)
238 actual_temp = (temp / 8) - 49;
239 else
240 actual_temp = 0;
241
242 actual_temp = actual_temp * 1000;
243
244 return actual_temp;
245}
246
247/*
248 * Indirect registers accessor
249 */
250u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
251{
252 unsigned long flags;
253 u32 r;
254
255 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
256 WREG32(PCIE_INDEX, reg);
257 (void)RREG32(PCIE_INDEX);
258 r = RREG32(PCIE_DATA);
259 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
260 return r;
261}
262
263void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
264{
265 unsigned long flags;
266
267 spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
268 WREG32(PCIE_INDEX, reg);
269 (void)RREG32(PCIE_INDEX);
270 WREG32(PCIE_DATA, v);
271 (void)RREG32(PCIE_DATA);
272 spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
273}
274
275static const u32 spectre_rlc_save_restore_register_list[] =
276{
277 (0x0e00 << 16) | (0xc12c >> 2),
278 0x00000000,
279 (0x0e00 << 16) | (0xc140 >> 2),
280 0x00000000,
281 (0x0e00 << 16) | (0xc150 >> 2),
282 0x00000000,
283 (0x0e00 << 16) | (0xc15c >> 2),
284 0x00000000,
285 (0x0e00 << 16) | (0xc168 >> 2),
286 0x00000000,
287 (0x0e00 << 16) | (0xc170 >> 2),
288 0x00000000,
289 (0x0e00 << 16) | (0xc178 >> 2),
290 0x00000000,
291 (0x0e00 << 16) | (0xc204 >> 2),
292 0x00000000,
293 (0x0e00 << 16) | (0xc2b4 >> 2),
294 0x00000000,
295 (0x0e00 << 16) | (0xc2b8 >> 2),
296 0x00000000,
297 (0x0e00 << 16) | (0xc2bc >> 2),
298 0x00000000,
299 (0x0e00 << 16) | (0xc2c0 >> 2),
300 0x00000000,
301 (0x0e00 << 16) | (0x8228 >> 2),
302 0x00000000,
303 (0x0e00 << 16) | (0x829c >> 2),
304 0x00000000,
305 (0x0e00 << 16) | (0x869c >> 2),
306 0x00000000,
307 (0x0600 << 16) | (0x98f4 >> 2),
308 0x00000000,
309 (0x0e00 << 16) | (0x98f8 >> 2),
310 0x00000000,
311 (0x0e00 << 16) | (0x9900 >> 2),
312 0x00000000,
313 (0x0e00 << 16) | (0xc260 >> 2),
314 0x00000000,
315 (0x0e00 << 16) | (0x90e8 >> 2),
316 0x00000000,
317 (0x0e00 << 16) | (0x3c000 >> 2),
318 0x00000000,
319 (0x0e00 << 16) | (0x3c00c >> 2),
320 0x00000000,
321 (0x0e00 << 16) | (0x8c1c >> 2),
322 0x00000000,
323 (0x0e00 << 16) | (0x9700 >> 2),
324 0x00000000,
325 (0x0e00 << 16) | (0xcd20 >> 2),
326 0x00000000,
327 (0x4e00 << 16) | (0xcd20 >> 2),
328 0x00000000,
329 (0x5e00 << 16) | (0xcd20 >> 2),
330 0x00000000,
331 (0x6e00 << 16) | (0xcd20 >> 2),
332 0x00000000,
333 (0x7e00 << 16) | (0xcd20 >> 2),
334 0x00000000,
335 (0x8e00 << 16) | (0xcd20 >> 2),
336 0x00000000,
337 (0x9e00 << 16) | (0xcd20 >> 2),
338 0x00000000,
339 (0xae00 << 16) | (0xcd20 >> 2),
340 0x00000000,
341 (0xbe00 << 16) | (0xcd20 >> 2),
342 0x00000000,
343 (0x0e00 << 16) | (0x89bc >> 2),
344 0x00000000,
345 (0x0e00 << 16) | (0x8900 >> 2),
346 0x00000000,
347 0x3,
348 (0x0e00 << 16) | (0xc130 >> 2),
349 0x00000000,
350 (0x0e00 << 16) | (0xc134 >> 2),
351 0x00000000,
352 (0x0e00 << 16) | (0xc1fc >> 2),
353 0x00000000,
354 (0x0e00 << 16) | (0xc208 >> 2),
355 0x00000000,
356 (0x0e00 << 16) | (0xc264 >> 2),
357 0x00000000,
358 (0x0e00 << 16) | (0xc268 >> 2),
359 0x00000000,
360 (0x0e00 << 16) | (0xc26c >> 2),
361 0x00000000,
362 (0x0e00 << 16) | (0xc270 >> 2),
363 0x00000000,
364 (0x0e00 << 16) | (0xc274 >> 2),
365 0x00000000,
366 (0x0e00 << 16) | (0xc278 >> 2),
367 0x00000000,
368 (0x0e00 << 16) | (0xc27c >> 2),
369 0x00000000,
370 (0x0e00 << 16) | (0xc280 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0xc284 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0xc288 >> 2),
375 0x00000000,
376 (0x0e00 << 16) | (0xc28c >> 2),
377 0x00000000,
378 (0x0e00 << 16) | (0xc290 >> 2),
379 0x00000000,
380 (0x0e00 << 16) | (0xc294 >> 2),
381 0x00000000,
382 (0x0e00 << 16) | (0xc298 >> 2),
383 0x00000000,
384 (0x0e00 << 16) | (0xc29c >> 2),
385 0x00000000,
386 (0x0e00 << 16) | (0xc2a0 >> 2),
387 0x00000000,
388 (0x0e00 << 16) | (0xc2a4 >> 2),
389 0x00000000,
390 (0x0e00 << 16) | (0xc2a8 >> 2),
391 0x00000000,
392 (0x0e00 << 16) | (0xc2ac >> 2),
393 0x00000000,
394 (0x0e00 << 16) | (0xc2b0 >> 2),
395 0x00000000,
396 (0x0e00 << 16) | (0x301d0 >> 2),
397 0x00000000,
398 (0x0e00 << 16) | (0x30238 >> 2),
399 0x00000000,
400 (0x0e00 << 16) | (0x30250 >> 2),
401 0x00000000,
402 (0x0e00 << 16) | (0x30254 >> 2),
403 0x00000000,
404 (0x0e00 << 16) | (0x30258 >> 2),
405 0x00000000,
406 (0x0e00 << 16) | (0x3025c >> 2),
407 0x00000000,
408 (0x4e00 << 16) | (0xc900 >> 2),
409 0x00000000,
410 (0x5e00 << 16) | (0xc900 >> 2),
411 0x00000000,
412 (0x6e00 << 16) | (0xc900 >> 2),
413 0x00000000,
414 (0x7e00 << 16) | (0xc900 >> 2),
415 0x00000000,
416 (0x8e00 << 16) | (0xc900 >> 2),
417 0x00000000,
418 (0x9e00 << 16) | (0xc900 >> 2),
419 0x00000000,
420 (0xae00 << 16) | (0xc900 >> 2),
421 0x00000000,
422 (0xbe00 << 16) | (0xc900 >> 2),
423 0x00000000,
424 (0x4e00 << 16) | (0xc904 >> 2),
425 0x00000000,
426 (0x5e00 << 16) | (0xc904 >> 2),
427 0x00000000,
428 (0x6e00 << 16) | (0xc904 >> 2),
429 0x00000000,
430 (0x7e00 << 16) | (0xc904 >> 2),
431 0x00000000,
432 (0x8e00 << 16) | (0xc904 >> 2),
433 0x00000000,
434 (0x9e00 << 16) | (0xc904 >> 2),
435 0x00000000,
436 (0xae00 << 16) | (0xc904 >> 2),
437 0x00000000,
438 (0xbe00 << 16) | (0xc904 >> 2),
439 0x00000000,
440 (0x4e00 << 16) | (0xc908 >> 2),
441 0x00000000,
442 (0x5e00 << 16) | (0xc908 >> 2),
443 0x00000000,
444 (0x6e00 << 16) | (0xc908 >> 2),
445 0x00000000,
446 (0x7e00 << 16) | (0xc908 >> 2),
447 0x00000000,
448 (0x8e00 << 16) | (0xc908 >> 2),
449 0x00000000,
450 (0x9e00 << 16) | (0xc908 >> 2),
451 0x00000000,
452 (0xae00 << 16) | (0xc908 >> 2),
453 0x00000000,
454 (0xbe00 << 16) | (0xc908 >> 2),
455 0x00000000,
456 (0x4e00 << 16) | (0xc90c >> 2),
457 0x00000000,
458 (0x5e00 << 16) | (0xc90c >> 2),
459 0x00000000,
460 (0x6e00 << 16) | (0xc90c >> 2),
461 0x00000000,
462 (0x7e00 << 16) | (0xc90c >> 2),
463 0x00000000,
464 (0x8e00 << 16) | (0xc90c >> 2),
465 0x00000000,
466 (0x9e00 << 16) | (0xc90c >> 2),
467 0x00000000,
468 (0xae00 << 16) | (0xc90c >> 2),
469 0x00000000,
470 (0xbe00 << 16) | (0xc90c >> 2),
471 0x00000000,
472 (0x4e00 << 16) | (0xc910 >> 2),
473 0x00000000,
474 (0x5e00 << 16) | (0xc910 >> 2),
475 0x00000000,
476 (0x6e00 << 16) | (0xc910 >> 2),
477 0x00000000,
478 (0x7e00 << 16) | (0xc910 >> 2),
479 0x00000000,
480 (0x8e00 << 16) | (0xc910 >> 2),
481 0x00000000,
482 (0x9e00 << 16) | (0xc910 >> 2),
483 0x00000000,
484 (0xae00 << 16) | (0xc910 >> 2),
485 0x00000000,
486 (0xbe00 << 16) | (0xc910 >> 2),
487 0x00000000,
488 (0x0e00 << 16) | (0xc99c >> 2),
489 0x00000000,
490 (0x0e00 << 16) | (0x9834 >> 2),
491 0x00000000,
492 (0x0000 << 16) | (0x30f00 >> 2),
493 0x00000000,
494 (0x0001 << 16) | (0x30f00 >> 2),
495 0x00000000,
496 (0x0000 << 16) | (0x30f04 >> 2),
497 0x00000000,
498 (0x0001 << 16) | (0x30f04 >> 2),
499 0x00000000,
500 (0x0000 << 16) | (0x30f08 >> 2),
501 0x00000000,
502 (0x0001 << 16) | (0x30f08 >> 2),
503 0x00000000,
504 (0x0000 << 16) | (0x30f0c >> 2),
505 0x00000000,
506 (0x0001 << 16) | (0x30f0c >> 2),
507 0x00000000,
508 (0x0600 << 16) | (0x9b7c >> 2),
509 0x00000000,
510 (0x0e00 << 16) | (0x8a14 >> 2),
511 0x00000000,
512 (0x0e00 << 16) | (0x8a18 >> 2),
513 0x00000000,
514 (0x0600 << 16) | (0x30a00 >> 2),
515 0x00000000,
516 (0x0e00 << 16) | (0x8bf0 >> 2),
517 0x00000000,
518 (0x0e00 << 16) | (0x8bcc >> 2),
519 0x00000000,
520 (0x0e00 << 16) | (0x8b24 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0x30a04 >> 2),
523 0x00000000,
524 (0x0600 << 16) | (0x30a10 >> 2),
525 0x00000000,
526 (0x0600 << 16) | (0x30a14 >> 2),
527 0x00000000,
528 (0x0600 << 16) | (0x30a18 >> 2),
529 0x00000000,
530 (0x0600 << 16) | (0x30a2c >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0xc700 >> 2),
533 0x00000000,
534 (0x0e00 << 16) | (0xc704 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0xc708 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0xc768 >> 2),
539 0x00000000,
540 (0x0400 << 16) | (0xc770 >> 2),
541 0x00000000,
542 (0x0400 << 16) | (0xc774 >> 2),
543 0x00000000,
544 (0x0400 << 16) | (0xc778 >> 2),
545 0x00000000,
546 (0x0400 << 16) | (0xc77c >> 2),
547 0x00000000,
548 (0x0400 << 16) | (0xc780 >> 2),
549 0x00000000,
550 (0x0400 << 16) | (0xc784 >> 2),
551 0x00000000,
552 (0x0400 << 16) | (0xc788 >> 2),
553 0x00000000,
554 (0x0400 << 16) | (0xc78c >> 2),
555 0x00000000,
556 (0x0400 << 16) | (0xc798 >> 2),
557 0x00000000,
558 (0x0400 << 16) | (0xc79c >> 2),
559 0x00000000,
560 (0x0400 << 16) | (0xc7a0 >> 2),
561 0x00000000,
562 (0x0400 << 16) | (0xc7a4 >> 2),
563 0x00000000,
564 (0x0400 << 16) | (0xc7a8 >> 2),
565 0x00000000,
566 (0x0400 << 16) | (0xc7ac >> 2),
567 0x00000000,
568 (0x0400 << 16) | (0xc7b0 >> 2),
569 0x00000000,
570 (0x0400 << 16) | (0xc7b4 >> 2),
571 0x00000000,
572 (0x0e00 << 16) | (0x9100 >> 2),
573 0x00000000,
574 (0x0e00 << 16) | (0x3c010 >> 2),
575 0x00000000,
576 (0x0e00 << 16) | (0x92a8 >> 2),
577 0x00000000,
578 (0x0e00 << 16) | (0x92ac >> 2),
579 0x00000000,
580 (0x0e00 << 16) | (0x92b4 >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0x92b8 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0x92bc >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0x92c0 >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0x92c4 >> 2),
589 0x00000000,
590 (0x0e00 << 16) | (0x92c8 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0x92cc >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0x92d0 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0x8c00 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0x8c04 >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0x8c20 >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x8c38 >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x8c3c >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0xae00 >> 2),
607 0x00000000,
608 (0x0e00 << 16) | (0x9604 >> 2),
609 0x00000000,
610 (0x0e00 << 16) | (0xac08 >> 2),
611 0x00000000,
612 (0x0e00 << 16) | (0xac0c >> 2),
613 0x00000000,
614 (0x0e00 << 16) | (0xac10 >> 2),
615 0x00000000,
616 (0x0e00 << 16) | (0xac14 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0xac58 >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0xac68 >> 2),
621 0x00000000,
622 (0x0e00 << 16) | (0xac6c >> 2),
623 0x00000000,
624 (0x0e00 << 16) | (0xac70 >> 2),
625 0x00000000,
626 (0x0e00 << 16) | (0xac74 >> 2),
627 0x00000000,
628 (0x0e00 << 16) | (0xac78 >> 2),
629 0x00000000,
630 (0x0e00 << 16) | (0xac7c >> 2),
631 0x00000000,
632 (0x0e00 << 16) | (0xac80 >> 2),
633 0x00000000,
634 (0x0e00 << 16) | (0xac84 >> 2),
635 0x00000000,
636 (0x0e00 << 16) | (0xac88 >> 2),
637 0x00000000,
638 (0x0e00 << 16) | (0xac8c >> 2),
639 0x00000000,
640 (0x0e00 << 16) | (0x970c >> 2),
641 0x00000000,
642 (0x0e00 << 16) | (0x9714 >> 2),
643 0x00000000,
644 (0x0e00 << 16) | (0x9718 >> 2),
645 0x00000000,
646 (0x0e00 << 16) | (0x971c >> 2),
647 0x00000000,
648 (0x0e00 << 16) | (0x31068 >> 2),
649 0x00000000,
650 (0x4e00 << 16) | (0x31068 >> 2),
651 0x00000000,
652 (0x5e00 << 16) | (0x31068 >> 2),
653 0x00000000,
654 (0x6e00 << 16) | (0x31068 >> 2),
655 0x00000000,
656 (0x7e00 << 16) | (0x31068 >> 2),
657 0x00000000,
658 (0x8e00 << 16) | (0x31068 >> 2),
659 0x00000000,
660 (0x9e00 << 16) | (0x31068 >> 2),
661 0x00000000,
662 (0xae00 << 16) | (0x31068 >> 2),
663 0x00000000,
664 (0xbe00 << 16) | (0x31068 >> 2),
665 0x00000000,
666 (0x0e00 << 16) | (0xcd10 >> 2),
667 0x00000000,
668 (0x0e00 << 16) | (0xcd14 >> 2),
669 0x00000000,
670 (0x0e00 << 16) | (0x88b0 >> 2),
671 0x00000000,
672 (0x0e00 << 16) | (0x88b4 >> 2),
673 0x00000000,
674 (0x0e00 << 16) | (0x88b8 >> 2),
675 0x00000000,
676 (0x0e00 << 16) | (0x88bc >> 2),
677 0x00000000,
678 (0x0400 << 16) | (0x89c0 >> 2),
679 0x00000000,
680 (0x0e00 << 16) | (0x88c4 >> 2),
681 0x00000000,
682 (0x0e00 << 16) | (0x88c8 >> 2),
683 0x00000000,
684 (0x0e00 << 16) | (0x88d0 >> 2),
685 0x00000000,
686 (0x0e00 << 16) | (0x88d4 >> 2),
687 0x00000000,
688 (0x0e00 << 16) | (0x88d8 >> 2),
689 0x00000000,
690 (0x0e00 << 16) | (0x8980 >> 2),
691 0x00000000,
692 (0x0e00 << 16) | (0x30938 >> 2),
693 0x00000000,
694 (0x0e00 << 16) | (0x3093c >> 2),
695 0x00000000,
696 (0x0e00 << 16) | (0x30940 >> 2),
697 0x00000000,
698 (0x0e00 << 16) | (0x89a0 >> 2),
699 0x00000000,
700 (0x0e00 << 16) | (0x30900 >> 2),
701 0x00000000,
702 (0x0e00 << 16) | (0x30904 >> 2),
703 0x00000000,
704 (0x0e00 << 16) | (0x89b4 >> 2),
705 0x00000000,
706 (0x0e00 << 16) | (0x3c210 >> 2),
707 0x00000000,
708 (0x0e00 << 16) | (0x3c214 >> 2),
709 0x00000000,
710 (0x0e00 << 16) | (0x3c218 >> 2),
711 0x00000000,
712 (0x0e00 << 16) | (0x8904 >> 2),
713 0x00000000,
714 0x5,
715 (0x0e00 << 16) | (0x8c28 >> 2),
716 (0x0e00 << 16) | (0x8c2c >> 2),
717 (0x0e00 << 16) | (0x8c30 >> 2),
718 (0x0e00 << 16) | (0x8c34 >> 2),
719 (0x0e00 << 16) | (0x9600 >> 2),
720};
721
722static const u32 kalindi_rlc_save_restore_register_list[] =
723{
724 (0x0e00 << 16) | (0xc12c >> 2),
725 0x00000000,
726 (0x0e00 << 16) | (0xc140 >> 2),
727 0x00000000,
728 (0x0e00 << 16) | (0xc150 >> 2),
729 0x00000000,
730 (0x0e00 << 16) | (0xc15c >> 2),
731 0x00000000,
732 (0x0e00 << 16) | (0xc168 >> 2),
733 0x00000000,
734 (0x0e00 << 16) | (0xc170 >> 2),
735 0x00000000,
736 (0x0e00 << 16) | (0xc204 >> 2),
737 0x00000000,
738 (0x0e00 << 16) | (0xc2b4 >> 2),
739 0x00000000,
740 (0x0e00 << 16) | (0xc2b8 >> 2),
741 0x00000000,
742 (0x0e00 << 16) | (0xc2bc >> 2),
743 0x00000000,
744 (0x0e00 << 16) | (0xc2c0 >> 2),
745 0x00000000,
746 (0x0e00 << 16) | (0x8228 >> 2),
747 0x00000000,
748 (0x0e00 << 16) | (0x829c >> 2),
749 0x00000000,
750 (0x0e00 << 16) | (0x869c >> 2),
751 0x00000000,
752 (0x0600 << 16) | (0x98f4 >> 2),
753 0x00000000,
754 (0x0e00 << 16) | (0x98f8 >> 2),
755 0x00000000,
756 (0x0e00 << 16) | (0x9900 >> 2),
757 0x00000000,
758 (0x0e00 << 16) | (0xc260 >> 2),
759 0x00000000,
760 (0x0e00 << 16) | (0x90e8 >> 2),
761 0x00000000,
762 (0x0e00 << 16) | (0x3c000 >> 2),
763 0x00000000,
764 (0x0e00 << 16) | (0x3c00c >> 2),
765 0x00000000,
766 (0x0e00 << 16) | (0x8c1c >> 2),
767 0x00000000,
768 (0x0e00 << 16) | (0x9700 >> 2),
769 0x00000000,
770 (0x0e00 << 16) | (0xcd20 >> 2),
771 0x00000000,
772 (0x4e00 << 16) | (0xcd20 >> 2),
773 0x00000000,
774 (0x5e00 << 16) | (0xcd20 >> 2),
775 0x00000000,
776 (0x6e00 << 16) | (0xcd20 >> 2),
777 0x00000000,
778 (0x7e00 << 16) | (0xcd20 >> 2),
779 0x00000000,
780 (0x0e00 << 16) | (0x89bc >> 2),
781 0x00000000,
782 (0x0e00 << 16) | (0x8900 >> 2),
783 0x00000000,
784 0x3,
785 (0x0e00 << 16) | (0xc130 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0xc134 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0xc1fc >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0xc208 >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xc264 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0xc268 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xc26c >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xc270 >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xc274 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xc28c >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xc290 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xc294 >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0xc298 >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0xc2a0 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0xc2a4 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0xc2a8 >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0xc2ac >> 2),
818 0x00000000,
819 (0x0e00 << 16) | (0x301d0 >> 2),
820 0x00000000,
821 (0x0e00 << 16) | (0x30238 >> 2),
822 0x00000000,
823 (0x0e00 << 16) | (0x30250 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0x30254 >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0x30258 >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0x3025c >> 2),
830 0x00000000,
831 (0x4e00 << 16) | (0xc900 >> 2),
832 0x00000000,
833 (0x5e00 << 16) | (0xc900 >> 2),
834 0x00000000,
835 (0x6e00 << 16) | (0xc900 >> 2),
836 0x00000000,
837 (0x7e00 << 16) | (0xc900 >> 2),
838 0x00000000,
839 (0x4e00 << 16) | (0xc904 >> 2),
840 0x00000000,
841 (0x5e00 << 16) | (0xc904 >> 2),
842 0x00000000,
843 (0x6e00 << 16) | (0xc904 >> 2),
844 0x00000000,
845 (0x7e00 << 16) | (0xc904 >> 2),
846 0x00000000,
847 (0x4e00 << 16) | (0xc908 >> 2),
848 0x00000000,
849 (0x5e00 << 16) | (0xc908 >> 2),
850 0x00000000,
851 (0x6e00 << 16) | (0xc908 >> 2),
852 0x00000000,
853 (0x7e00 << 16) | (0xc908 >> 2),
854 0x00000000,
855 (0x4e00 << 16) | (0xc90c >> 2),
856 0x00000000,
857 (0x5e00 << 16) | (0xc90c >> 2),
858 0x00000000,
859 (0x6e00 << 16) | (0xc90c >> 2),
860 0x00000000,
861 (0x7e00 << 16) | (0xc90c >> 2),
862 0x00000000,
863 (0x4e00 << 16) | (0xc910 >> 2),
864 0x00000000,
865 (0x5e00 << 16) | (0xc910 >> 2),
866 0x00000000,
867 (0x6e00 << 16) | (0xc910 >> 2),
868 0x00000000,
869 (0x7e00 << 16) | (0xc910 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0xc99c >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x9834 >> 2),
874 0x00000000,
875 (0x0000 << 16) | (0x30f00 >> 2),
876 0x00000000,
877 (0x0000 << 16) | (0x30f04 >> 2),
878 0x00000000,
879 (0x0000 << 16) | (0x30f08 >> 2),
880 0x00000000,
881 (0x0000 << 16) | (0x30f0c >> 2),
882 0x00000000,
883 (0x0600 << 16) | (0x9b7c >> 2),
884 0x00000000,
885 (0x0e00 << 16) | (0x8a14 >> 2),
886 0x00000000,
887 (0x0e00 << 16) | (0x8a18 >> 2),
888 0x00000000,
889 (0x0600 << 16) | (0x30a00 >> 2),
890 0x00000000,
891 (0x0e00 << 16) | (0x8bf0 >> 2),
892 0x00000000,
893 (0x0e00 << 16) | (0x8bcc >> 2),
894 0x00000000,
895 (0x0e00 << 16) | (0x8b24 >> 2),
896 0x00000000,
897 (0x0e00 << 16) | (0x30a04 >> 2),
898 0x00000000,
899 (0x0600 << 16) | (0x30a10 >> 2),
900 0x00000000,
901 (0x0600 << 16) | (0x30a14 >> 2),
902 0x00000000,
903 (0x0600 << 16) | (0x30a18 >> 2),
904 0x00000000,
905 (0x0600 << 16) | (0x30a2c >> 2),
906 0x00000000,
907 (0x0e00 << 16) | (0xc700 >> 2),
908 0x00000000,
909 (0x0e00 << 16) | (0xc704 >> 2),
910 0x00000000,
911 (0x0e00 << 16) | (0xc708 >> 2),
912 0x00000000,
913 (0x0e00 << 16) | (0xc768 >> 2),
914 0x00000000,
915 (0x0400 << 16) | (0xc770 >> 2),
916 0x00000000,
917 (0x0400 << 16) | (0xc774 >> 2),
918 0x00000000,
919 (0x0400 << 16) | (0xc798 >> 2),
920 0x00000000,
921 (0x0400 << 16) | (0xc79c >> 2),
922 0x00000000,
923 (0x0e00 << 16) | (0x9100 >> 2),
924 0x00000000,
925 (0x0e00 << 16) | (0x3c010 >> 2),
926 0x00000000,
927 (0x0e00 << 16) | (0x8c00 >> 2),
928 0x00000000,
929 (0x0e00 << 16) | (0x8c04 >> 2),
930 0x00000000,
931 (0x0e00 << 16) | (0x8c20 >> 2),
932 0x00000000,
933 (0x0e00 << 16) | (0x8c38 >> 2),
934 0x00000000,
935 (0x0e00 << 16) | (0x8c3c >> 2),
936 0x00000000,
937 (0x0e00 << 16) | (0xae00 >> 2),
938 0x00000000,
939 (0x0e00 << 16) | (0x9604 >> 2),
940 0x00000000,
941 (0x0e00 << 16) | (0xac08 >> 2),
942 0x00000000,
943 (0x0e00 << 16) | (0xac0c >> 2),
944 0x00000000,
945 (0x0e00 << 16) | (0xac10 >> 2),
946 0x00000000,
947 (0x0e00 << 16) | (0xac14 >> 2),
948 0x00000000,
949 (0x0e00 << 16) | (0xac58 >> 2),
950 0x00000000,
951 (0x0e00 << 16) | (0xac68 >> 2),
952 0x00000000,
953 (0x0e00 << 16) | (0xac6c >> 2),
954 0x00000000,
955 (0x0e00 << 16) | (0xac70 >> 2),
956 0x00000000,
957 (0x0e00 << 16) | (0xac74 >> 2),
958 0x00000000,
959 (0x0e00 << 16) | (0xac78 >> 2),
960 0x00000000,
961 (0x0e00 << 16) | (0xac7c >> 2),
962 0x00000000,
963 (0x0e00 << 16) | (0xac80 >> 2),
964 0x00000000,
965 (0x0e00 << 16) | (0xac84 >> 2),
966 0x00000000,
967 (0x0e00 << 16) | (0xac88 >> 2),
968 0x00000000,
969 (0x0e00 << 16) | (0xac8c >> 2),
970 0x00000000,
971 (0x0e00 << 16) | (0x970c >> 2),
972 0x00000000,
973 (0x0e00 << 16) | (0x9714 >> 2),
974 0x00000000,
975 (0x0e00 << 16) | (0x9718 >> 2),
976 0x00000000,
977 (0x0e00 << 16) | (0x971c >> 2),
978 0x00000000,
979 (0x0e00 << 16) | (0x31068 >> 2),
980 0x00000000,
981 (0x4e00 << 16) | (0x31068 >> 2),
982 0x00000000,
983 (0x5e00 << 16) | (0x31068 >> 2),
984 0x00000000,
985 (0x6e00 << 16) | (0x31068 >> 2),
986 0x00000000,
987 (0x7e00 << 16) | (0x31068 >> 2),
988 0x00000000,
989 (0x0e00 << 16) | (0xcd10 >> 2),
990 0x00000000,
991 (0x0e00 << 16) | (0xcd14 >> 2),
992 0x00000000,
993 (0x0e00 << 16) | (0x88b0 >> 2),
994 0x00000000,
995 (0x0e00 << 16) | (0x88b4 >> 2),
996 0x00000000,
997 (0x0e00 << 16) | (0x88b8 >> 2),
998 0x00000000,
999 (0x0e00 << 16) | (0x88bc >> 2),
1000 0x00000000,
1001 (0x0400 << 16) | (0x89c0 >> 2),
1002 0x00000000,
1003 (0x0e00 << 16) | (0x88c4 >> 2),
1004 0x00000000,
1005 (0x0e00 << 16) | (0x88c8 >> 2),
1006 0x00000000,
1007 (0x0e00 << 16) | (0x88d0 >> 2),
1008 0x00000000,
1009 (0x0e00 << 16) | (0x88d4 >> 2),
1010 0x00000000,
1011 (0x0e00 << 16) | (0x88d8 >> 2),
1012 0x00000000,
1013 (0x0e00 << 16) | (0x8980 >> 2),
1014 0x00000000,
1015 (0x0e00 << 16) | (0x30938 >> 2),
1016 0x00000000,
1017 (0x0e00 << 16) | (0x3093c >> 2),
1018 0x00000000,
1019 (0x0e00 << 16) | (0x30940 >> 2),
1020 0x00000000,
1021 (0x0e00 << 16) | (0x89a0 >> 2),
1022 0x00000000,
1023 (0x0e00 << 16) | (0x30900 >> 2),
1024 0x00000000,
1025 (0x0e00 << 16) | (0x30904 >> 2),
1026 0x00000000,
1027 (0x0e00 << 16) | (0x89b4 >> 2),
1028 0x00000000,
1029 (0x0e00 << 16) | (0x3e1fc >> 2),
1030 0x00000000,
1031 (0x0e00 << 16) | (0x3c210 >> 2),
1032 0x00000000,
1033 (0x0e00 << 16) | (0x3c214 >> 2),
1034 0x00000000,
1035 (0x0e00 << 16) | (0x3c218 >> 2),
1036 0x00000000,
1037 (0x0e00 << 16) | (0x8904 >> 2),
1038 0x00000000,
1039 0x5,
1040 (0x0e00 << 16) | (0x8c28 >> 2),
1041 (0x0e00 << 16) | (0x8c2c >> 2),
1042 (0x0e00 << 16) | (0x8c30 >> 2),
1043 (0x0e00 << 16) | (0x8c34 >> 2),
1044 (0x0e00 << 16) | (0x9600 >> 2),
1045};
1046
1047static const u32 bonaire_golden_spm_registers[] =
1048{
1049 0x30800, 0xe0ffffff, 0xe0000000
1050};
1051
1052static const u32 bonaire_golden_common_registers[] =
1053{
1054 0xc770, 0xffffffff, 0x00000800,
1055 0xc774, 0xffffffff, 0x00000800,
1056 0xc798, 0xffffffff, 0x00007fbf,
1057 0xc79c, 0xffffffff, 0x00007faf
1058};
1059
1060static const u32 bonaire_golden_registers[] =
1061{
1062 0x3354, 0x00000333, 0x00000333,
1063 0x3350, 0x000c0fc0, 0x00040200,
1064 0x9a10, 0x00010000, 0x00058208,
1065 0x3c000, 0xffff1fff, 0x00140000,
1066 0x3c200, 0xfdfc0fff, 0x00000100,
1067 0x3c234, 0x40000000, 0x40000200,
1068 0x9830, 0xffffffff, 0x00000000,
1069 0x9834, 0xf00fffff, 0x00000400,
1070 0x9838, 0x0002021c, 0x00020200,
1071 0xc78, 0x00000080, 0x00000000,
1072 0x5bb0, 0x000000f0, 0x00000070,
1073 0x5bc0, 0xf0311fff, 0x80300000,
1074 0x98f8, 0x73773777, 0x12010001,
1075 0x350c, 0x00810000, 0x408af000,
1076 0x7030, 0x31000111, 0x00000011,
1077 0x2f48, 0x73773777, 0x12010001,
1078 0x220c, 0x00007fb6, 0x0021a1b1,
1079 0x2210, 0x00007fb6, 0x002021b1,
1080 0x2180, 0x00007fb6, 0x00002191,
1081 0x2218, 0x00007fb6, 0x002121b1,
1082 0x221c, 0x00007fb6, 0x002021b1,
1083 0x21dc, 0x00007fb6, 0x00002191,
1084 0x21e0, 0x00007fb6, 0x00002191,
1085 0x3628, 0x0000003f, 0x0000000a,
1086 0x362c, 0x0000003f, 0x0000000a,
1087 0x2ae4, 0x00073ffe, 0x000022a2,
1088 0x240c, 0x000007ff, 0x00000000,
1089 0x8a14, 0xf000003f, 0x00000007,
1090 0x8bf0, 0x00002001, 0x00000001,
1091 0x8b24, 0xffffffff, 0x00ffffff,
1092 0x30a04, 0x0000ff0f, 0x00000000,
1093 0x28a4c, 0x07ffffff, 0x06000000,
1094 0x4d8, 0x00000fff, 0x00000100,
1095 0x3e78, 0x00000001, 0x00000002,
1096 0x9100, 0x03000000, 0x0362c688,
1097 0x8c00, 0x000000ff, 0x00000001,
1098 0xe40, 0x00001fff, 0x00001fff,
1099 0x9060, 0x0000007f, 0x00000020,
1100 0x9508, 0x00010000, 0x00010000,
1101 0xac14, 0x000003ff, 0x000000f3,
1102 0xac0c, 0xffffffff, 0x00001032
1103};
1104
1105static const u32 bonaire_mgcg_cgcg_init[] =
1106{
1107 0xc420, 0xffffffff, 0xfffffffc,
1108 0x30800, 0xffffffff, 0xe0000000,
1109 0x3c2a0, 0xffffffff, 0x00000100,
1110 0x3c208, 0xffffffff, 0x00000100,
1111 0x3c2c0, 0xffffffff, 0xc0000100,
1112 0x3c2c8, 0xffffffff, 0xc0000100,
1113 0x3c2c4, 0xffffffff, 0xc0000100,
1114 0x55e4, 0xffffffff, 0x00600100,
1115 0x3c280, 0xffffffff, 0x00000100,
1116 0x3c214, 0xffffffff, 0x06000100,
1117 0x3c220, 0xffffffff, 0x00000100,
1118 0x3c218, 0xffffffff, 0x06000100,
1119 0x3c204, 0xffffffff, 0x00000100,
1120 0x3c2e0, 0xffffffff, 0x00000100,
1121 0x3c224, 0xffffffff, 0x00000100,
1122 0x3c200, 0xffffffff, 0x00000100,
1123 0x3c230, 0xffffffff, 0x00000100,
1124 0x3c234, 0xffffffff, 0x00000100,
1125 0x3c250, 0xffffffff, 0x00000100,
1126 0x3c254, 0xffffffff, 0x00000100,
1127 0x3c258, 0xffffffff, 0x00000100,
1128 0x3c25c, 0xffffffff, 0x00000100,
1129 0x3c260, 0xffffffff, 0x00000100,
1130 0x3c27c, 0xffffffff, 0x00000100,
1131 0x3c278, 0xffffffff, 0x00000100,
1132 0x3c210, 0xffffffff, 0x06000100,
1133 0x3c290, 0xffffffff, 0x00000100,
1134 0x3c274, 0xffffffff, 0x00000100,
1135 0x3c2b4, 0xffffffff, 0x00000100,
1136 0x3c2b0, 0xffffffff, 0x00000100,
1137 0x3c270, 0xffffffff, 0x00000100,
1138 0x30800, 0xffffffff, 0xe0000000,
1139 0x3c020, 0xffffffff, 0x00010000,
1140 0x3c024, 0xffffffff, 0x00030002,
1141 0x3c028, 0xffffffff, 0x00040007,
1142 0x3c02c, 0xffffffff, 0x00060005,
1143 0x3c030, 0xffffffff, 0x00090008,
1144 0x3c034, 0xffffffff, 0x00010000,
1145 0x3c038, 0xffffffff, 0x00030002,
1146 0x3c03c, 0xffffffff, 0x00040007,
1147 0x3c040, 0xffffffff, 0x00060005,
1148 0x3c044, 0xffffffff, 0x00090008,
1149 0x3c048, 0xffffffff, 0x00010000,
1150 0x3c04c, 0xffffffff, 0x00030002,
1151 0x3c050, 0xffffffff, 0x00040007,
1152 0x3c054, 0xffffffff, 0x00060005,
1153 0x3c058, 0xffffffff, 0x00090008,
1154 0x3c05c, 0xffffffff, 0x00010000,
1155 0x3c060, 0xffffffff, 0x00030002,
1156 0x3c064, 0xffffffff, 0x00040007,
1157 0x3c068, 0xffffffff, 0x00060005,
1158 0x3c06c, 0xffffffff, 0x00090008,
1159 0x3c070, 0xffffffff, 0x00010000,
1160 0x3c074, 0xffffffff, 0x00030002,
1161 0x3c078, 0xffffffff, 0x00040007,
1162 0x3c07c, 0xffffffff, 0x00060005,
1163 0x3c080, 0xffffffff, 0x00090008,
1164 0x3c084, 0xffffffff, 0x00010000,
1165 0x3c088, 0xffffffff, 0x00030002,
1166 0x3c08c, 0xffffffff, 0x00040007,
1167 0x3c090, 0xffffffff, 0x00060005,
1168 0x3c094, 0xffffffff, 0x00090008,
1169 0x3c098, 0xffffffff, 0x00010000,
1170 0x3c09c, 0xffffffff, 0x00030002,
1171 0x3c0a0, 0xffffffff, 0x00040007,
1172 0x3c0a4, 0xffffffff, 0x00060005,
1173 0x3c0a8, 0xffffffff, 0x00090008,
1174 0x3c000, 0xffffffff, 0x96e00200,
1175 0x8708, 0xffffffff, 0x00900100,
1176 0xc424, 0xffffffff, 0x0020003f,
1177 0x38, 0xffffffff, 0x0140001c,
1178 0x3c, 0x000f0000, 0x000f0000,
1179 0x220, 0xffffffff, 0xC060000C,
1180 0x224, 0xc0000fff, 0x00000100,
1181 0xf90, 0xffffffff, 0x00000100,
1182 0xf98, 0x00000101, 0x00000000,
1183 0x20a8, 0xffffffff, 0x00000104,
1184 0x55e4, 0xff000fff, 0x00000100,
1185 0x30cc, 0xc0000fff, 0x00000104,
1186 0xc1e4, 0x00000001, 0x00000001,
1187 0xd00c, 0xff000ff0, 0x00000100,
1188 0xd80c, 0xff000ff0, 0x00000100
1189};
1190
1191static const u32 spectre_golden_spm_registers[] =
1192{
1193 0x30800, 0xe0ffffff, 0xe0000000
1194};
1195
1196static const u32 spectre_golden_common_registers[] =
1197{
1198 0xc770, 0xffffffff, 0x00000800,
1199 0xc774, 0xffffffff, 0x00000800,
1200 0xc798, 0xffffffff, 0x00007fbf,
1201 0xc79c, 0xffffffff, 0x00007faf
1202};
1203
1204static const u32 spectre_golden_registers[] =
1205{
1206 0x3c000, 0xffff1fff, 0x96940200,
1207 0x3c00c, 0xffff0001, 0xff000000,
1208 0x3c200, 0xfffc0fff, 0x00000100,
1209 0x6ed8, 0x00010101, 0x00010000,
1210 0x9834, 0xf00fffff, 0x00000400,
1211 0x9838, 0xfffffffc, 0x00020200,
1212 0x5bb0, 0x000000f0, 0x00000070,
1213 0x5bc0, 0xf0311fff, 0x80300000,
1214 0x98f8, 0x73773777, 0x12010001,
1215 0x9b7c, 0x00ff0000, 0x00fc0000,
1216 0x2f48, 0x73773777, 0x12010001,
1217 0x8a14, 0xf000003f, 0x00000007,
1218 0x8b24, 0xffffffff, 0x00ffffff,
1219 0x28350, 0x3f3f3fff, 0x00000082,
1220 0x28354, 0x0000003f, 0x00000000,
1221 0x3e78, 0x00000001, 0x00000002,
1222 0x913c, 0xffff03df, 0x00000004,
1223 0xc768, 0x00000008, 0x00000008,
1224 0x8c00, 0x000008ff, 0x00000800,
1225 0x9508, 0x00010000, 0x00010000,
1226 0xac0c, 0xffffffff, 0x54763210,
1227 0x214f8, 0x01ff01ff, 0x00000002,
1228 0x21498, 0x007ff800, 0x00200000,
1229 0x2015c, 0xffffffff, 0x00000f40,
1230 0x30934, 0xffffffff, 0x00000001
1231};
1232
1233static const u32 spectre_mgcg_cgcg_init[] =
1234{
1235 0xc420, 0xffffffff, 0xfffffffc,
1236 0x30800, 0xffffffff, 0xe0000000,
1237 0x3c2a0, 0xffffffff, 0x00000100,
1238 0x3c208, 0xffffffff, 0x00000100,
1239 0x3c2c0, 0xffffffff, 0x00000100,
1240 0x3c2c8, 0xffffffff, 0x00000100,
1241 0x3c2c4, 0xffffffff, 0x00000100,
1242 0x55e4, 0xffffffff, 0x00600100,
1243 0x3c280, 0xffffffff, 0x00000100,
1244 0x3c214, 0xffffffff, 0x06000100,
1245 0x3c220, 0xffffffff, 0x00000100,
1246 0x3c218, 0xffffffff, 0x06000100,
1247 0x3c204, 0xffffffff, 0x00000100,
1248 0x3c2e0, 0xffffffff, 0x00000100,
1249 0x3c224, 0xffffffff, 0x00000100,
1250 0x3c200, 0xffffffff, 0x00000100,
1251 0x3c230, 0xffffffff, 0x00000100,
1252 0x3c234, 0xffffffff, 0x00000100,
1253 0x3c250, 0xffffffff, 0x00000100,
1254 0x3c254, 0xffffffff, 0x00000100,
1255 0x3c258, 0xffffffff, 0x00000100,
1256 0x3c25c, 0xffffffff, 0x00000100,
1257 0x3c260, 0xffffffff, 0x00000100,
1258 0x3c27c, 0xffffffff, 0x00000100,
1259 0x3c278, 0xffffffff, 0x00000100,
1260 0x3c210, 0xffffffff, 0x06000100,
1261 0x3c290, 0xffffffff, 0x00000100,
1262 0x3c274, 0xffffffff, 0x00000100,
1263 0x3c2b4, 0xffffffff, 0x00000100,
1264 0x3c2b0, 0xffffffff, 0x00000100,
1265 0x3c270, 0xffffffff, 0x00000100,
1266 0x30800, 0xffffffff, 0xe0000000,
1267 0x3c020, 0xffffffff, 0x00010000,
1268 0x3c024, 0xffffffff, 0x00030002,
1269 0x3c028, 0xffffffff, 0x00040007,
1270 0x3c02c, 0xffffffff, 0x00060005,
1271 0x3c030, 0xffffffff, 0x00090008,
1272 0x3c034, 0xffffffff, 0x00010000,
1273 0x3c038, 0xffffffff, 0x00030002,
1274 0x3c03c, 0xffffffff, 0x00040007,
1275 0x3c040, 0xffffffff, 0x00060005,
1276 0x3c044, 0xffffffff, 0x00090008,
1277 0x3c048, 0xffffffff, 0x00010000,
1278 0x3c04c, 0xffffffff, 0x00030002,
1279 0x3c050, 0xffffffff, 0x00040007,
1280 0x3c054, 0xffffffff, 0x00060005,
1281 0x3c058, 0xffffffff, 0x00090008,
1282 0x3c05c, 0xffffffff, 0x00010000,
1283 0x3c060, 0xffffffff, 0x00030002,
1284 0x3c064, 0xffffffff, 0x00040007,
1285 0x3c068, 0xffffffff, 0x00060005,
1286 0x3c06c, 0xffffffff, 0x00090008,
1287 0x3c070, 0xffffffff, 0x00010000,
1288 0x3c074, 0xffffffff, 0x00030002,
1289 0x3c078, 0xffffffff, 0x00040007,
1290 0x3c07c, 0xffffffff, 0x00060005,
1291 0x3c080, 0xffffffff, 0x00090008,
1292 0x3c084, 0xffffffff, 0x00010000,
1293 0x3c088, 0xffffffff, 0x00030002,
1294 0x3c08c, 0xffffffff, 0x00040007,
1295 0x3c090, 0xffffffff, 0x00060005,
1296 0x3c094, 0xffffffff, 0x00090008,
1297 0x3c098, 0xffffffff, 0x00010000,
1298 0x3c09c, 0xffffffff, 0x00030002,
1299 0x3c0a0, 0xffffffff, 0x00040007,
1300 0x3c0a4, 0xffffffff, 0x00060005,
1301 0x3c0a8, 0xffffffff, 0x00090008,
1302 0x3c0ac, 0xffffffff, 0x00010000,
1303 0x3c0b0, 0xffffffff, 0x00030002,
1304 0x3c0b4, 0xffffffff, 0x00040007,
1305 0x3c0b8, 0xffffffff, 0x00060005,
1306 0x3c0bc, 0xffffffff, 0x00090008,
1307 0x3c000, 0xffffffff, 0x96e00200,
1308 0x8708, 0xffffffff, 0x00900100,
1309 0xc424, 0xffffffff, 0x0020003f,
1310 0x38, 0xffffffff, 0x0140001c,
1311 0x3c, 0x000f0000, 0x000f0000,
1312 0x220, 0xffffffff, 0xC060000C,
1313 0x224, 0xc0000fff, 0x00000100,
1314 0xf90, 0xffffffff, 0x00000100,
1315 0xf98, 0x00000101, 0x00000000,
1316 0x20a8, 0xffffffff, 0x00000104,
1317 0x55e4, 0xff000fff, 0x00000100,
1318 0x30cc, 0xc0000fff, 0x00000104,
1319 0xc1e4, 0x00000001, 0x00000001,
1320 0xd00c, 0xff000ff0, 0x00000100,
1321 0xd80c, 0xff000ff0, 0x00000100
1322};
1323
1324static const u32 kalindi_golden_spm_registers[] =
1325{
1326 0x30800, 0xe0ffffff, 0xe0000000
1327};
1328
1329static const u32 kalindi_golden_common_registers[] =
1330{
1331 0xc770, 0xffffffff, 0x00000800,
1332 0xc774, 0xffffffff, 0x00000800,
1333 0xc798, 0xffffffff, 0x00007fbf,
1334 0xc79c, 0xffffffff, 0x00007faf
1335};
1336
1337static const u32 kalindi_golden_registers[] =
1338{
1339 0x3c000, 0xffffdfff, 0x6e944040,
1340 0x55e4, 0xff607fff, 0xfc000100,
1341 0x3c220, 0xff000fff, 0x00000100,
1342 0x3c224, 0xff000fff, 0x00000100,
1343 0x3c200, 0xfffc0fff, 0x00000100,
1344 0x6ed8, 0x00010101, 0x00010000,
1345 0x9830, 0xffffffff, 0x00000000,
1346 0x9834, 0xf00fffff, 0x00000400,
1347 0x5bb0, 0x000000f0, 0x00000070,
1348 0x5bc0, 0xf0311fff, 0x80300000,
1349 0x98f8, 0x73773777, 0x12010001,
1350 0x98fc, 0xffffffff, 0x00000010,
1351 0x9b7c, 0x00ff0000, 0x00fc0000,
1352 0x8030, 0x00001f0f, 0x0000100a,
1353 0x2f48, 0x73773777, 0x12010001,
1354 0x2408, 0x000fffff, 0x000c007f,
1355 0x8a14, 0xf000003f, 0x00000007,
1356 0x8b24, 0x3fff3fff, 0x00ffcfff,
1357 0x30a04, 0x0000ff0f, 0x00000000,
1358 0x28a4c, 0x07ffffff, 0x06000000,
1359 0x4d8, 0x00000fff, 0x00000100,
1360 0x3e78, 0x00000001, 0x00000002,
1361 0xc768, 0x00000008, 0x00000008,
1362 0x8c00, 0x000000ff, 0x00000003,
1363 0x214f8, 0x01ff01ff, 0x00000002,
1364 0x21498, 0x007ff800, 0x00200000,
1365 0x2015c, 0xffffffff, 0x00000f40,
1366 0x88c4, 0x001f3ae3, 0x00000082,
1367 0x88d4, 0x0000001f, 0x00000010,
1368 0x30934, 0xffffffff, 0x00000000
1369};
1370
1371static const u32 kalindi_mgcg_cgcg_init[] =
1372{
1373 0xc420, 0xffffffff, 0xfffffffc,
1374 0x30800, 0xffffffff, 0xe0000000,
1375 0x3c2a0, 0xffffffff, 0x00000100,
1376 0x3c208, 0xffffffff, 0x00000100,
1377 0x3c2c0, 0xffffffff, 0x00000100,
1378 0x3c2c8, 0xffffffff, 0x00000100,
1379 0x3c2c4, 0xffffffff, 0x00000100,
1380 0x55e4, 0xffffffff, 0x00600100,
1381 0x3c280, 0xffffffff, 0x00000100,
1382 0x3c214, 0xffffffff, 0x06000100,
1383 0x3c220, 0xffffffff, 0x00000100,
1384 0x3c218, 0xffffffff, 0x06000100,
1385 0x3c204, 0xffffffff, 0x00000100,
1386 0x3c2e0, 0xffffffff, 0x00000100,
1387 0x3c224, 0xffffffff, 0x00000100,
1388 0x3c200, 0xffffffff, 0x00000100,
1389 0x3c230, 0xffffffff, 0x00000100,
1390 0x3c234, 0xffffffff, 0x00000100,
1391 0x3c250, 0xffffffff, 0x00000100,
1392 0x3c254, 0xffffffff, 0x00000100,
1393 0x3c258, 0xffffffff, 0x00000100,
1394 0x3c25c, 0xffffffff, 0x00000100,
1395 0x3c260, 0xffffffff, 0x00000100,
1396 0x3c27c, 0xffffffff, 0x00000100,
1397 0x3c278, 0xffffffff, 0x00000100,
1398 0x3c210, 0xffffffff, 0x06000100,
1399 0x3c290, 0xffffffff, 0x00000100,
1400 0x3c274, 0xffffffff, 0x00000100,
1401 0x3c2b4, 0xffffffff, 0x00000100,
1402 0x3c2b0, 0xffffffff, 0x00000100,
1403 0x3c270, 0xffffffff, 0x00000100,
1404 0x30800, 0xffffffff, 0xe0000000,
1405 0x3c020, 0xffffffff, 0x00010000,
1406 0x3c024, 0xffffffff, 0x00030002,
1407 0x3c028, 0xffffffff, 0x00040007,
1408 0x3c02c, 0xffffffff, 0x00060005,
1409 0x3c030, 0xffffffff, 0x00090008,
1410 0x3c034, 0xffffffff, 0x00010000,
1411 0x3c038, 0xffffffff, 0x00030002,
1412 0x3c03c, 0xffffffff, 0x00040007,
1413 0x3c040, 0xffffffff, 0x00060005,
1414 0x3c044, 0xffffffff, 0x00090008,
1415 0x3c000, 0xffffffff, 0x96e00200,
1416 0x8708, 0xffffffff, 0x00900100,
1417 0xc424, 0xffffffff, 0x0020003f,
1418 0x38, 0xffffffff, 0x0140001c,
1419 0x3c, 0x000f0000, 0x000f0000,
1420 0x220, 0xffffffff, 0xC060000C,
1421 0x224, 0xc0000fff, 0x00000100,
1422 0x20a8, 0xffffffff, 0x00000104,
1423 0x55e4, 0xff000fff, 0x00000100,
1424 0x30cc, 0xc0000fff, 0x00000104,
1425 0xc1e4, 0x00000001, 0x00000001,
1426 0xd00c, 0xff000ff0, 0x00000100,
1427 0xd80c, 0xff000ff0, 0x00000100
1428};
1429
1430static const u32 hawaii_golden_spm_registers[] =
1431{
1432 0x30800, 0xe0ffffff, 0xe0000000
1433};
1434
1435static const u32 hawaii_golden_common_registers[] =
1436{
1437 0x30800, 0xffffffff, 0xe0000000,
1438 0x28350, 0xffffffff, 0x3a00161a,
1439 0x28354, 0xffffffff, 0x0000002e,
1440 0x9a10, 0xffffffff, 0x00018208,
1441 0x98f8, 0xffffffff, 0x12011003
1442};
1443
1444static const u32 hawaii_golden_registers[] =
1445{
1446 0x3354, 0x00000333, 0x00000333,
1447 0x9a10, 0x00010000, 0x00058208,
1448 0x9830, 0xffffffff, 0x00000000,
1449 0x9834, 0xf00fffff, 0x00000400,
1450 0x9838, 0x0002021c, 0x00020200,
1451 0xc78, 0x00000080, 0x00000000,
1452 0x5bb0, 0x000000f0, 0x00000070,
1453 0x5bc0, 0xf0311fff, 0x80300000,
1454 0x350c, 0x00810000, 0x408af000,
1455 0x7030, 0x31000111, 0x00000011,
1456 0x2f48, 0x73773777, 0x12010001,
1457 0x2120, 0x0000007f, 0x0000001b,
1458 0x21dc, 0x00007fb6, 0x00002191,
1459 0x3628, 0x0000003f, 0x0000000a,
1460 0x362c, 0x0000003f, 0x0000000a,
1461 0x2ae4, 0x00073ffe, 0x000022a2,
1462 0x240c, 0x000007ff, 0x00000000,
1463 0x8bf0, 0x00002001, 0x00000001,
1464 0x8b24, 0xffffffff, 0x00ffffff,
1465 0x30a04, 0x0000ff0f, 0x00000000,
1466 0x28a4c, 0x07ffffff, 0x06000000,
1467 0x3e78, 0x00000001, 0x00000002,
1468 0xc768, 0x00000008, 0x00000008,
1469 0xc770, 0x00000f00, 0x00000800,
1470 0xc774, 0x00000f00, 0x00000800,
1471 0xc798, 0x00ffffff, 0x00ff7fbf,
1472 0xc79c, 0x00ffffff, 0x00ff7faf,
1473 0x8c00, 0x000000ff, 0x00000800,
1474 0xe40, 0x00001fff, 0x00001fff,
1475 0x9060, 0x0000007f, 0x00000020,
1476 0x9508, 0x00010000, 0x00010000,
1477 0xae00, 0x00100000, 0x000ff07c,
1478 0xac14, 0x000003ff, 0x0000000f,
1479 0xac10, 0xffffffff, 0x7564fdec,
1480 0xac0c, 0xffffffff, 0x3120b9a8,
1481 0xac08, 0x20000000, 0x0f9c0000
1482};
1483
1484static const u32 hawaii_mgcg_cgcg_init[] =
1485{
1486 0xc420, 0xffffffff, 0xfffffffd,
1487 0x30800, 0xffffffff, 0xe0000000,
1488 0x3c2a0, 0xffffffff, 0x00000100,
1489 0x3c208, 0xffffffff, 0x00000100,
1490 0x3c2c0, 0xffffffff, 0x00000100,
1491 0x3c2c8, 0xffffffff, 0x00000100,
1492 0x3c2c4, 0xffffffff, 0x00000100,
1493 0x55e4, 0xffffffff, 0x00200100,
1494 0x3c280, 0xffffffff, 0x00000100,
1495 0x3c214, 0xffffffff, 0x06000100,
1496 0x3c220, 0xffffffff, 0x00000100,
1497 0x3c218, 0xffffffff, 0x06000100,
1498 0x3c204, 0xffffffff, 0x00000100,
1499 0x3c2e0, 0xffffffff, 0x00000100,
1500 0x3c224, 0xffffffff, 0x00000100,
1501 0x3c200, 0xffffffff, 0x00000100,
1502 0x3c230, 0xffffffff, 0x00000100,
1503 0x3c234, 0xffffffff, 0x00000100,
1504 0x3c250, 0xffffffff, 0x00000100,
1505 0x3c254, 0xffffffff, 0x00000100,
1506 0x3c258, 0xffffffff, 0x00000100,
1507 0x3c25c, 0xffffffff, 0x00000100,
1508 0x3c260, 0xffffffff, 0x00000100,
1509 0x3c27c, 0xffffffff, 0x00000100,
1510 0x3c278, 0xffffffff, 0x00000100,
1511 0x3c210, 0xffffffff, 0x06000100,
1512 0x3c290, 0xffffffff, 0x00000100,
1513 0x3c274, 0xffffffff, 0x00000100,
1514 0x3c2b4, 0xffffffff, 0x00000100,
1515 0x3c2b0, 0xffffffff, 0x00000100,
1516 0x3c270, 0xffffffff, 0x00000100,
1517 0x30800, 0xffffffff, 0xe0000000,
1518 0x3c020, 0xffffffff, 0x00010000,
1519 0x3c024, 0xffffffff, 0x00030002,
1520 0x3c028, 0xffffffff, 0x00040007,
1521 0x3c02c, 0xffffffff, 0x00060005,
1522 0x3c030, 0xffffffff, 0x00090008,
1523 0x3c034, 0xffffffff, 0x00010000,
1524 0x3c038, 0xffffffff, 0x00030002,
1525 0x3c03c, 0xffffffff, 0x00040007,
1526 0x3c040, 0xffffffff, 0x00060005,
1527 0x3c044, 0xffffffff, 0x00090008,
1528 0x3c048, 0xffffffff, 0x00010000,
1529 0x3c04c, 0xffffffff, 0x00030002,
1530 0x3c050, 0xffffffff, 0x00040007,
1531 0x3c054, 0xffffffff, 0x00060005,
1532 0x3c058, 0xffffffff, 0x00090008,
1533 0x3c05c, 0xffffffff, 0x00010000,
1534 0x3c060, 0xffffffff, 0x00030002,
1535 0x3c064, 0xffffffff, 0x00040007,
1536 0x3c068, 0xffffffff, 0x00060005,
1537 0x3c06c, 0xffffffff, 0x00090008,
1538 0x3c070, 0xffffffff, 0x00010000,
1539 0x3c074, 0xffffffff, 0x00030002,
1540 0x3c078, 0xffffffff, 0x00040007,
1541 0x3c07c, 0xffffffff, 0x00060005,
1542 0x3c080, 0xffffffff, 0x00090008,
1543 0x3c084, 0xffffffff, 0x00010000,
1544 0x3c088, 0xffffffff, 0x00030002,
1545 0x3c08c, 0xffffffff, 0x00040007,
1546 0x3c090, 0xffffffff, 0x00060005,
1547 0x3c094, 0xffffffff, 0x00090008,
1548 0x3c098, 0xffffffff, 0x00010000,
1549 0x3c09c, 0xffffffff, 0x00030002,
1550 0x3c0a0, 0xffffffff, 0x00040007,
1551 0x3c0a4, 0xffffffff, 0x00060005,
1552 0x3c0a8, 0xffffffff, 0x00090008,
1553 0x3c0ac, 0xffffffff, 0x00010000,
1554 0x3c0b0, 0xffffffff, 0x00030002,
1555 0x3c0b4, 0xffffffff, 0x00040007,
1556 0x3c0b8, 0xffffffff, 0x00060005,
1557 0x3c0bc, 0xffffffff, 0x00090008,
1558 0x3c0c0, 0xffffffff, 0x00010000,
1559 0x3c0c4, 0xffffffff, 0x00030002,
1560 0x3c0c8, 0xffffffff, 0x00040007,
1561 0x3c0cc, 0xffffffff, 0x00060005,
1562 0x3c0d0, 0xffffffff, 0x00090008,
1563 0x3c0d4, 0xffffffff, 0x00010000,
1564 0x3c0d8, 0xffffffff, 0x00030002,
1565 0x3c0dc, 0xffffffff, 0x00040007,
1566 0x3c0e0, 0xffffffff, 0x00060005,
1567 0x3c0e4, 0xffffffff, 0x00090008,
1568 0x3c0e8, 0xffffffff, 0x00010000,
1569 0x3c0ec, 0xffffffff, 0x00030002,
1570 0x3c0f0, 0xffffffff, 0x00040007,
1571 0x3c0f4, 0xffffffff, 0x00060005,
1572 0x3c0f8, 0xffffffff, 0x00090008,
1573 0xc318, 0xffffffff, 0x00020200,
1574 0x3350, 0xffffffff, 0x00000200,
1575 0x15c0, 0xffffffff, 0x00000400,
1576 0x55e8, 0xffffffff, 0x00000000,
1577 0x2f50, 0xffffffff, 0x00000902,
1578 0x3c000, 0xffffffff, 0x96940200,
1579 0x8708, 0xffffffff, 0x00900100,
1580 0xc424, 0xffffffff, 0x0020003f,
1581 0x38, 0xffffffff, 0x0140001c,
1582 0x3c, 0x000f0000, 0x000f0000,
1583 0x220, 0xffffffff, 0xc060000c,
1584 0x224, 0xc0000fff, 0x00000100,
1585 0xf90, 0xffffffff, 0x00000100,
1586 0xf98, 0x00000101, 0x00000000,
1587 0x20a8, 0xffffffff, 0x00000104,
1588 0x55e4, 0xff000fff, 0x00000100,
1589 0x30cc, 0xc0000fff, 0x00000104,
1590 0xc1e4, 0x00000001, 0x00000001,
1591 0xd00c, 0xff000ff0, 0x00000100,
1592 0xd80c, 0xff000ff0, 0x00000100
1593};
1594
1595static const u32 godavari_golden_registers[] =
1596{
1597 0x55e4, 0xff607fff, 0xfc000100,
1598 0x6ed8, 0x00010101, 0x00010000,
1599 0x9830, 0xffffffff, 0x00000000,
1600 0x98302, 0xf00fffff, 0x00000400,
1601 0x6130, 0xffffffff, 0x00010000,
1602 0x5bb0, 0x000000f0, 0x00000070,
1603 0x5bc0, 0xf0311fff, 0x80300000,
1604 0x98f8, 0x73773777, 0x12010001,
1605 0x98fc, 0xffffffff, 0x00000010,
1606 0x8030, 0x00001f0f, 0x0000100a,
1607 0x2f48, 0x73773777, 0x12010001,
1608 0x2408, 0x000fffff, 0x000c007f,
1609 0x8a14, 0xf000003f, 0x00000007,
1610 0x8b24, 0xffffffff, 0x00ff0fff,
1611 0x30a04, 0x0000ff0f, 0x00000000,
1612 0x28a4c, 0x07ffffff, 0x06000000,
1613 0x4d8, 0x00000fff, 0x00000100,
1614 0xd014, 0x00010000, 0x00810001,
1615 0xd814, 0x00010000, 0x00810001,
1616 0x3e78, 0x00000001, 0x00000002,
1617 0xc768, 0x00000008, 0x00000008,
1618 0xc770, 0x00000f00, 0x00000800,
1619 0xc774, 0x00000f00, 0x00000800,
1620 0xc798, 0x00ffffff, 0x00ff7fbf,
1621 0xc79c, 0x00ffffff, 0x00ff7faf,
1622 0x8c00, 0x000000ff, 0x00000001,
1623 0x214f8, 0x01ff01ff, 0x00000002,
1624 0x21498, 0x007ff800, 0x00200000,
1625 0x2015c, 0xffffffff, 0x00000f40,
1626 0x88c4, 0x001f3ae3, 0x00000082,
1627 0x88d4, 0x0000001f, 0x00000010,
1628 0x30934, 0xffffffff, 0x00000000
1629};
1630
1631
1632static void cik_init_golden_registers(struct radeon_device *rdev)
1633{
1634 switch (rdev->family) {
1635 case CHIP_BONAIRE:
1636 radeon_program_register_sequence(rdev,
1637 bonaire_mgcg_cgcg_init,
1638 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1639 radeon_program_register_sequence(rdev,
1640 bonaire_golden_registers,
1641 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1642 radeon_program_register_sequence(rdev,
1643 bonaire_golden_common_registers,
1644 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1645 radeon_program_register_sequence(rdev,
1646 bonaire_golden_spm_registers,
1647 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1648 break;
1649 case CHIP_KABINI:
1650 radeon_program_register_sequence(rdev,
1651 kalindi_mgcg_cgcg_init,
1652 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1653 radeon_program_register_sequence(rdev,
1654 kalindi_golden_registers,
1655 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1656 radeon_program_register_sequence(rdev,
1657 kalindi_golden_common_registers,
1658 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1659 radeon_program_register_sequence(rdev,
1660 kalindi_golden_spm_registers,
1661 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1662 break;
1663 case CHIP_MULLINS:
1664 radeon_program_register_sequence(rdev,
1665 kalindi_mgcg_cgcg_init,
1666 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1667 radeon_program_register_sequence(rdev,
1668 godavari_golden_registers,
1669 (const u32)ARRAY_SIZE(godavari_golden_registers));
1670 radeon_program_register_sequence(rdev,
1671 kalindi_golden_common_registers,
1672 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1673 radeon_program_register_sequence(rdev,
1674 kalindi_golden_spm_registers,
1675 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1676 break;
1677 case CHIP_KAVERI:
1678 radeon_program_register_sequence(rdev,
1679 spectre_mgcg_cgcg_init,
1680 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1681 radeon_program_register_sequence(rdev,
1682 spectre_golden_registers,
1683 (const u32)ARRAY_SIZE(spectre_golden_registers));
1684 radeon_program_register_sequence(rdev,
1685 spectre_golden_common_registers,
1686 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1687 radeon_program_register_sequence(rdev,
1688 spectre_golden_spm_registers,
1689 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1690 break;
1691 case CHIP_HAWAII:
1692 radeon_program_register_sequence(rdev,
1693 hawaii_mgcg_cgcg_init,
1694 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1695 radeon_program_register_sequence(rdev,
1696 hawaii_golden_registers,
1697 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1698 radeon_program_register_sequence(rdev,
1699 hawaii_golden_common_registers,
1700 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1701 radeon_program_register_sequence(rdev,
1702 hawaii_golden_spm_registers,
1703 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1704 break;
1705 default:
1706 break;
1707 }
1708}
1709
1710/**
1711 * cik_get_xclk - get the xclk
1712 *
1713 * @rdev: radeon_device pointer
1714 *
1715 * Returns the reference clock used by the gfx engine
1716 * (CIK).
1717 */
1718u32 cik_get_xclk(struct radeon_device *rdev)
1719{
1720 u32 reference_clock = rdev->clock.spll.reference_freq;
1721
1722 if (rdev->flags & RADEON_IS_IGP) {
1723 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1724 return reference_clock / 2;
1725 } else {
1726 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1727 return reference_clock / 4;
1728 }
1729 return reference_clock;
1730}
1731
1732/**
1733 * cik_mm_rdoorbell - read a doorbell dword
1734 *
1735 * @rdev: radeon_device pointer
1736 * @index: doorbell index
1737 *
1738 * Returns the value in the doorbell aperture at the
1739 * requested doorbell index (CIK).
1740 */
1741u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1742{
1743 if (index < rdev->doorbell.num_doorbells) {
1744 return readl(rdev->doorbell.ptr + index);
1745 } else {
1746 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1747 return 0;
1748 }
1749}
1750
1751/**
1752 * cik_mm_wdoorbell - write a doorbell dword
1753 *
1754 * @rdev: radeon_device pointer
1755 * @index: doorbell index
1756 * @v: value to write
1757 *
1758 * Writes @v to the doorbell aperture at the
1759 * requested doorbell index (CIK).
1760 */
1761void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1762{
1763 if (index < rdev->doorbell.num_doorbells) {
1764 writel(v, rdev->doorbell.ptr + index);
1765 } else {
1766 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1767 }
1768}
1769
1770#define BONAIRE_IO_MC_REGS_SIZE 36
1771
1772static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1773{
1774 {0x00000070, 0x04400000},
1775 {0x00000071, 0x80c01803},
1776 {0x00000072, 0x00004004},
1777 {0x00000073, 0x00000100},
1778 {0x00000074, 0x00ff0000},
1779 {0x00000075, 0x34000000},
1780 {0x00000076, 0x08000014},
1781 {0x00000077, 0x00cc08ec},
1782 {0x00000078, 0x00000400},
1783 {0x00000079, 0x00000000},
1784 {0x0000007a, 0x04090000},
1785 {0x0000007c, 0x00000000},
1786 {0x0000007e, 0x4408a8e8},
1787 {0x0000007f, 0x00000304},
1788 {0x00000080, 0x00000000},
1789 {0x00000082, 0x00000001},
1790 {0x00000083, 0x00000002},
1791 {0x00000084, 0xf3e4f400},
1792 {0x00000085, 0x052024e3},
1793 {0x00000087, 0x00000000},
1794 {0x00000088, 0x01000000},
1795 {0x0000008a, 0x1c0a0000},
1796 {0x0000008b, 0xff010000},
1797 {0x0000008d, 0xffffefff},
1798 {0x0000008e, 0xfff3efff},
1799 {0x0000008f, 0xfff3efbf},
1800 {0x00000092, 0xf7ffffff},
1801 {0x00000093, 0xffffff7f},
1802 {0x00000095, 0x00101101},
1803 {0x00000096, 0x00000fff},
1804 {0x00000097, 0x00116fff},
1805 {0x00000098, 0x60010000},
1806 {0x00000099, 0x10010000},
1807 {0x0000009a, 0x00006000},
1808 {0x0000009b, 0x00001000},
1809 {0x0000009f, 0x00b48000}
1810};
1811
1812#define HAWAII_IO_MC_REGS_SIZE 22
1813
1814static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1815{
1816 {0x0000007d, 0x40000000},
1817 {0x0000007e, 0x40180304},
1818 {0x0000007f, 0x0000ff00},
1819 {0x00000081, 0x00000000},
1820 {0x00000083, 0x00000800},
1821 {0x00000086, 0x00000000},
1822 {0x00000087, 0x00000100},
1823 {0x00000088, 0x00020100},
1824 {0x00000089, 0x00000000},
1825 {0x0000008b, 0x00040000},
1826 {0x0000008c, 0x00000100},
1827 {0x0000008e, 0xff010000},
1828 {0x00000090, 0xffffefff},
1829 {0x00000091, 0xfff3efff},
1830 {0x00000092, 0xfff3efbf},
1831 {0x00000093, 0xf7ffffff},
1832 {0x00000094, 0xffffff7f},
1833 {0x00000095, 0x00000fff},
1834 {0x00000096, 0x00116fff},
1835 {0x00000097, 0x60010000},
1836 {0x00000098, 0x10010000},
1837 {0x0000009f, 0x00c79000}
1838};
1839
1840
1841/**
1842 * cik_srbm_select - select specific register instances
1843 *
1844 * @rdev: radeon_device pointer
1845 * @me: selected ME (micro engine)
1846 * @pipe: pipe
1847 * @queue: queue
1848 * @vmid: VMID
1849 *
1850 * Switches the currently active registers instances. Some
1851 * registers are instanced per VMID, others are instanced per
1852 * me/pipe/queue combination.
1853 */
1854static void cik_srbm_select(struct radeon_device *rdev,
1855 u32 me, u32 pipe, u32 queue, u32 vmid)
1856{
1857 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1858 MEID(me & 0x3) |
1859 VMID(vmid & 0xf) |
1860 QUEUEID(queue & 0x7));
1861 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1862}
1863
1864/* ucode loading */
1865/**
1866 * ci_mc_load_microcode - load MC ucode into the hw
1867 *
1868 * @rdev: radeon_device pointer
1869 *
1870 * Load the GDDR MC ucode into the hw (CIK).
1871 * Returns 0 on success, error on failure.
1872 */
1873int ci_mc_load_microcode(struct radeon_device *rdev)
1874{
1875 const __be32 *fw_data = NULL;
1876 const __le32 *new_fw_data = NULL;
1877 u32 running, tmp;
1878 u32 *io_mc_regs = NULL;
1879 const __le32 *new_io_mc_regs = NULL;
1880 int i, regs_size, ucode_size;
1881
1882 if (!rdev->mc_fw)
1883 return -EINVAL;
1884
1885 if (rdev->new_fw) {
1886 const struct mc_firmware_header_v1_0 *hdr =
1887 (const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1888
1889 radeon_ucode_print_mc_hdr(&hdr->header);
1890
1891 regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1892 new_io_mc_regs = (const __le32 *)
1893 (rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1894 ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1895 new_fw_data = (const __le32 *)
1896 (rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1897 } else {
1898 ucode_size = rdev->mc_fw->size / 4;
1899
1900 switch (rdev->family) {
1901 case CHIP_BONAIRE:
1902 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1903 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1904 break;
1905 case CHIP_HAWAII:
1906 io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1907 regs_size = HAWAII_IO_MC_REGS_SIZE;
1908 break;
1909 default:
1910 return -EINVAL;
1911 }
1912 fw_data = (const __be32 *)rdev->mc_fw->data;
1913 }
1914
1915 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1916
1917 if (running == 0) {
1918 /* reset the engine and set to writable */
1919 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921
1922 /* load mc io regs */
1923 for (i = 0; i < regs_size; i++) {
1924 if (rdev->new_fw) {
1925 WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 } else {
1928 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 }
1931 }
1932
1933 tmp = RREG32(MC_SEQ_MISC0);
1934 if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 }
1940
1941 /* load the MC ucode */
1942 for (i = 0; i < ucode_size; i++) {
1943 if (rdev->new_fw)
1944 WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 else
1946 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 }
1948
1949 /* put the engine back into the active state */
1950 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953
1954 /* wait for training to complete */
1955 for (i = 0; i < rdev->usec_timeout; i++) {
1956 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 break;
1958 udelay(1);
1959 }
1960 for (i = 0; i < rdev->usec_timeout; i++) {
1961 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 break;
1963 udelay(1);
1964 }
1965 }
1966
1967 return 0;
1968}
1969
1970/**
1971 * cik_init_microcode - load ucode images from disk
1972 *
1973 * @rdev: radeon_device pointer
1974 *
1975 * Use the firmware interface to load the ucode images into
1976 * the driver (not loaded into hw).
1977 * Returns 0 on success, error on failure.
1978 */
1979static int cik_init_microcode(struct radeon_device *rdev)
1980{
1981 const char *chip_name;
1982 const char *new_chip_name;
1983 size_t pfp_req_size, me_req_size, ce_req_size,
1984 mec_req_size, rlc_req_size, mc_req_size = 0,
1985 sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1986 char fw_name[30];
1987 int new_fw = 0;
1988 int err;
1989 int num_fw;
1990 bool new_smc = false;
1991
1992 DRM_DEBUG("\n");
1993
1994 switch (rdev->family) {
1995 case CHIP_BONAIRE:
1996 chip_name = "BONAIRE";
1997 if ((rdev->pdev->revision == 0x80) ||
1998 (rdev->pdev->revision == 0x81) ||
1999 (rdev->pdev->device == 0x665f))
2000 new_smc = true;
2001 new_chip_name = "bonaire";
2002 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2003 me_req_size = CIK_ME_UCODE_SIZE * 4;
2004 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2005 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2006 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2007 mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2008 mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2009 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2010 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2011 num_fw = 8;
2012 break;
2013 case CHIP_HAWAII:
2014 chip_name = "HAWAII";
2015 if (rdev->pdev->revision == 0x80)
2016 new_smc = true;
2017 new_chip_name = "hawaii";
2018 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2019 me_req_size = CIK_ME_UCODE_SIZE * 4;
2020 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2021 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2022 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2023 mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2024 mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2025 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2026 smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2027 num_fw = 8;
2028 break;
2029 case CHIP_KAVERI:
2030 chip_name = "KAVERI";
2031 new_chip_name = "kaveri";
2032 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2033 me_req_size = CIK_ME_UCODE_SIZE * 4;
2034 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2035 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2036 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2037 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2038 num_fw = 7;
2039 break;
2040 case CHIP_KABINI:
2041 chip_name = "KABINI";
2042 new_chip_name = "kabini";
2043 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2044 me_req_size = CIK_ME_UCODE_SIZE * 4;
2045 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2046 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2047 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2048 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2049 num_fw = 6;
2050 break;
2051 case CHIP_MULLINS:
2052 chip_name = "MULLINS";
2053 new_chip_name = "mullins";
2054 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2055 me_req_size = CIK_ME_UCODE_SIZE * 4;
2056 ce_req_size = CIK_CE_UCODE_SIZE * 4;
2057 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2058 rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2059 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2060 num_fw = 6;
2061 break;
2062 default: BUG();
2063 }
2064
2065 DRM_INFO("Loading %s Microcode\n", new_chip_name);
2066
2067 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2068 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2069 if (err) {
2070 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2071 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2072 if (err)
2073 goto out;
2074 if (rdev->pfp_fw->size != pfp_req_size) {
2075 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2076 rdev->pfp_fw->size, fw_name);
2077 err = -EINVAL;
2078 goto out;
2079 }
2080 } else {
2081 err = radeon_ucode_validate(rdev->pfp_fw);
2082 if (err) {
2083 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2084 fw_name);
2085 goto out;
2086 } else {
2087 new_fw++;
2088 }
2089 }
2090
2091 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2092 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2093 if (err) {
2094 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2095 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2096 if (err)
2097 goto out;
2098 if (rdev->me_fw->size != me_req_size) {
2099 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2100 rdev->me_fw->size, fw_name);
2101 err = -EINVAL;
2102 }
2103 } else {
2104 err = radeon_ucode_validate(rdev->me_fw);
2105 if (err) {
2106 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2107 fw_name);
2108 goto out;
2109 } else {
2110 new_fw++;
2111 }
2112 }
2113
2114 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 if (err) {
2117 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 if (err)
2120 goto out;
2121 if (rdev->ce_fw->size != ce_req_size) {
2122 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2123 rdev->ce_fw->size, fw_name);
2124 err = -EINVAL;
2125 }
2126 } else {
2127 err = radeon_ucode_validate(rdev->ce_fw);
2128 if (err) {
2129 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2130 fw_name);
2131 goto out;
2132 } else {
2133 new_fw++;
2134 }
2135 }
2136
2137 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2138 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2139 if (err) {
2140 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2141 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2142 if (err)
2143 goto out;
2144 if (rdev->mec_fw->size != mec_req_size) {
2145 pr_err("cik_cp: Bogus length %zu in firmware \"%s\"\n",
2146 rdev->mec_fw->size, fw_name);
2147 err = -EINVAL;
2148 }
2149 } else {
2150 err = radeon_ucode_validate(rdev->mec_fw);
2151 if (err) {
2152 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2153 fw_name);
2154 goto out;
2155 } else {
2156 new_fw++;
2157 }
2158 }
2159
2160 if (rdev->family == CHIP_KAVERI) {
2161 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2162 err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2163 if (err) {
2164 goto out;
2165 } else {
2166 err = radeon_ucode_validate(rdev->mec2_fw);
2167 if (err) {
2168 goto out;
2169 } else {
2170 new_fw++;
2171 }
2172 }
2173 }
2174
2175 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2176 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2177 if (err) {
2178 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2179 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2180 if (err)
2181 goto out;
2182 if (rdev->rlc_fw->size != rlc_req_size) {
2183 pr_err("cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2184 rdev->rlc_fw->size, fw_name);
2185 err = -EINVAL;
2186 }
2187 } else {
2188 err = radeon_ucode_validate(rdev->rlc_fw);
2189 if (err) {
2190 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2191 fw_name);
2192 goto out;
2193 } else {
2194 new_fw++;
2195 }
2196 }
2197
2198 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2199 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2200 if (err) {
2201 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2202 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2203 if (err)
2204 goto out;
2205 if (rdev->sdma_fw->size != sdma_req_size) {
2206 pr_err("cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2207 rdev->sdma_fw->size, fw_name);
2208 err = -EINVAL;
2209 }
2210 } else {
2211 err = radeon_ucode_validate(rdev->sdma_fw);
2212 if (err) {
2213 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2214 fw_name);
2215 goto out;
2216 } else {
2217 new_fw++;
2218 }
2219 }
2220
2221 /* No SMC, MC ucode on APUs */
2222 if (!(rdev->flags & RADEON_IS_IGP)) {
2223 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2224 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2225 if (err) {
2226 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2227 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2228 if (err) {
2229 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2230 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2231 if (err)
2232 goto out;
2233 }
2234 if ((rdev->mc_fw->size != mc_req_size) &&
2235 (rdev->mc_fw->size != mc2_req_size)){
2236 pr_err("cik_mc: Bogus length %zu in firmware \"%s\"\n",
2237 rdev->mc_fw->size, fw_name);
2238 err = -EINVAL;
2239 }
2240 DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2241 } else {
2242 err = radeon_ucode_validate(rdev->mc_fw);
2243 if (err) {
2244 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2245 fw_name);
2246 goto out;
2247 } else {
2248 new_fw++;
2249 }
2250 }
2251
2252 if (new_smc)
2253 snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
2254 else
2255 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2256 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2257 if (err) {
2258 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2259 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2260 if (err) {
2261 pr_err("smc: error loading firmware \"%s\"\n",
2262 fw_name);
2263 release_firmware(rdev->smc_fw);
2264 rdev->smc_fw = NULL;
2265 err = 0;
2266 } else if (rdev->smc_fw->size != smc_req_size) {
2267 pr_err("cik_smc: Bogus length %zu in firmware \"%s\"\n",
2268 rdev->smc_fw->size, fw_name);
2269 err = -EINVAL;
2270 }
2271 } else {
2272 err = radeon_ucode_validate(rdev->smc_fw);
2273 if (err) {
2274 pr_err("cik_fw: validation failed for firmware \"%s\"\n",
2275 fw_name);
2276 goto out;
2277 } else {
2278 new_fw++;
2279 }
2280 }
2281 }
2282
2283 if (new_fw == 0) {
2284 rdev->new_fw = false;
2285 } else if (new_fw < num_fw) {
2286 pr_err("ci_fw: mixing new and old firmware!\n");
2287 err = -EINVAL;
2288 } else {
2289 rdev->new_fw = true;
2290 }
2291
2292out:
2293 if (err) {
2294 if (err != -EINVAL)
2295 pr_err("cik_cp: Failed to load firmware \"%s\"\n",
2296 fw_name);
2297 release_firmware(rdev->pfp_fw);
2298 rdev->pfp_fw = NULL;
2299 release_firmware(rdev->me_fw);
2300 rdev->me_fw = NULL;
2301 release_firmware(rdev->ce_fw);
2302 rdev->ce_fw = NULL;
2303 release_firmware(rdev->mec_fw);
2304 rdev->mec_fw = NULL;
2305 release_firmware(rdev->mec2_fw);
2306 rdev->mec2_fw = NULL;
2307 release_firmware(rdev->rlc_fw);
2308 rdev->rlc_fw = NULL;
2309 release_firmware(rdev->sdma_fw);
2310 rdev->sdma_fw = NULL;
2311 release_firmware(rdev->mc_fw);
2312 rdev->mc_fw = NULL;
2313 release_firmware(rdev->smc_fw);
2314 rdev->smc_fw = NULL;
2315 }
2316 return err;
2317}
2318
2319/*
2320 * Core functions
2321 */
2322/**
2323 * cik_tiling_mode_table_init - init the hw tiling table
2324 *
2325 * @rdev: radeon_device pointer
2326 *
2327 * Starting with SI, the tiling setup is done globally in a
2328 * set of 32 tiling modes. Rather than selecting each set of
2329 * parameters per surface as on older asics, we just select
2330 * which index in the tiling table we want to use, and the
2331 * surface uses those parameters (CIK).
2332 */
2333static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2334{
2335 u32 *tile = rdev->config.cik.tile_mode_array;
2336 u32 *macrotile = rdev->config.cik.macrotile_mode_array;
2337 const u32 num_tile_mode_states =
2338 ARRAY_SIZE(rdev->config.cik.tile_mode_array);
2339 const u32 num_secondary_tile_mode_states =
2340 ARRAY_SIZE(rdev->config.cik.macrotile_mode_array);
2341 u32 reg_offset, split_equal_to_row_size;
2342 u32 num_pipe_configs;
2343 u32 num_rbs = rdev->config.cik.max_backends_per_se *
2344 rdev->config.cik.max_shader_engines;
2345
2346 switch (rdev->config.cik.mem_row_size_in_kb) {
2347 case 1:
2348 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2349 break;
2350 case 2:
2351 default:
2352 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2353 break;
2354 case 4:
2355 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2356 break;
2357 }
2358
2359 num_pipe_configs = rdev->config.cik.max_tile_pipes;
2360 if (num_pipe_configs > 8)
2361 num_pipe_configs = 16;
2362
2363 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2364 tile[reg_offset] = 0;
2365 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2366 macrotile[reg_offset] = 0;
2367
2368 switch(num_pipe_configs) {
2369 case 16:
2370 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2372 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2373 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2374 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2378 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2380 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2381 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2382 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2383 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2384 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2385 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2386 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 TILE_SPLIT(split_equal_to_row_size));
2390 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2393 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2396 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2397 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2400 TILE_SPLIT(split_equal_to_row_size));
2401 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2403 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2406 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2408 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2410 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2412 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2414 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2421 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2434 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2436 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2438 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2442 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2444 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2445 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2446 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448
2449 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 NUM_BANKS(ADDR_SURF_16_BANK));
2453 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456 NUM_BANKS(ADDR_SURF_16_BANK));
2457 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 NUM_BANKS(ADDR_SURF_16_BANK));
2461 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 NUM_BANKS(ADDR_SURF_16_BANK));
2465 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 NUM_BANKS(ADDR_SURF_8_BANK));
2469 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 NUM_BANKS(ADDR_SURF_4_BANK));
2473 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 NUM_BANKS(ADDR_SURF_2_BANK));
2477 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 NUM_BANKS(ADDR_SURF_16_BANK));
2481 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484 NUM_BANKS(ADDR_SURF_16_BANK));
2485 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 NUM_BANKS(ADDR_SURF_16_BANK));
2489 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 NUM_BANKS(ADDR_SURF_8_BANK));
2493 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 NUM_BANKS(ADDR_SURF_4_BANK));
2497 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2500 NUM_BANKS(ADDR_SURF_2_BANK));
2501 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504 NUM_BANKS(ADDR_SURF_2_BANK));
2505
2506 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2508 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2509 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2510 break;
2511
2512 case 8:
2513 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2515 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2516 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2517 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2519 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2520 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2521 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2523 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2524 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2525 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2529 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2531 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2532 TILE_SPLIT(split_equal_to_row_size));
2533 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 TILE_SPLIT(split_equal_to_row_size));
2544 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2549 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2551 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2558 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2559 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2564 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2568 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2569 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2572 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2574 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2576 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2577 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2578 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2579 tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2581 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2583 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2584 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2585 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2587 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2588 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2589 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2591
2592 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2593 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2594 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2595 NUM_BANKS(ADDR_SURF_16_BANK));
2596 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2599 NUM_BANKS(ADDR_SURF_16_BANK));
2600 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2602 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2603 NUM_BANKS(ADDR_SURF_16_BANK));
2604 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2605 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2606 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2607 NUM_BANKS(ADDR_SURF_16_BANK));
2608 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2609 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2610 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2611 NUM_BANKS(ADDR_SURF_8_BANK));
2612 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2613 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2614 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2615 NUM_BANKS(ADDR_SURF_4_BANK));
2616 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2617 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2618 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2619 NUM_BANKS(ADDR_SURF_2_BANK));
2620 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2623 NUM_BANKS(ADDR_SURF_16_BANK));
2624 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2625 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2626 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2627 NUM_BANKS(ADDR_SURF_16_BANK));
2628 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2629 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2630 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2631 NUM_BANKS(ADDR_SURF_16_BANK));
2632 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2635 NUM_BANKS(ADDR_SURF_16_BANK));
2636 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2638 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2639 NUM_BANKS(ADDR_SURF_8_BANK));
2640 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2643 NUM_BANKS(ADDR_SURF_4_BANK));
2644 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2647 NUM_BANKS(ADDR_SURF_2_BANK));
2648
2649 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2650 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2651 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2652 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2653 break;
2654
2655 case 4:
2656 if (num_rbs == 4) {
2657 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2658 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2659 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2661 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2662 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2663 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2664 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2665 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2666 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2667 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2668 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2669 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2671 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2672 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2673 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2674 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2675 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2676 TILE_SPLIT(split_equal_to_row_size));
2677 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2678 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2679 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2680 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2681 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2682 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2683 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2684 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2685 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2687 TILE_SPLIT(split_equal_to_row_size));
2688 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2689 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2690 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2693 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2694 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2697 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2698 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2699 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2701 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2702 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2705 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2708 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2712 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2713 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2714 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2715 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2716 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2717 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2718 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2721 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2723 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2725 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2727 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2729 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2731 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2733 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2735
2736 } else if (num_rbs < 4) {
2737 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2738 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2739 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2740 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2741 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2742 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2743 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2744 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2745 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2746 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2747 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2749 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2753 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2754 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2755 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2756 TILE_SPLIT(split_equal_to_row_size));
2757 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2758 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2760 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2761 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2762 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2763 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2764 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2765 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2766 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2767 TILE_SPLIT(split_equal_to_row_size));
2768 tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2769 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2770 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2771 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2773 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2774 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2775 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2777 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2778 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2779 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2781 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2782 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2783 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2785 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2788 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2789 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2790 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2792 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2793 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2794 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2796 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2799 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2800 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2801 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2802 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2803 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2807 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2814 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 }
2816
2817 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2819 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2820 NUM_BANKS(ADDR_SURF_16_BANK));
2821 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2824 NUM_BANKS(ADDR_SURF_16_BANK));
2825 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2826 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2827 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2828 NUM_BANKS(ADDR_SURF_16_BANK));
2829 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2831 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2832 NUM_BANKS(ADDR_SURF_16_BANK));
2833 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2835 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2836 NUM_BANKS(ADDR_SURF_16_BANK));
2837 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2840 NUM_BANKS(ADDR_SURF_8_BANK));
2841 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2844 NUM_BANKS(ADDR_SURF_4_BANK));
2845 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2846 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2847 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2848 NUM_BANKS(ADDR_SURF_16_BANK));
2849 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2852 NUM_BANKS(ADDR_SURF_16_BANK));
2853 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2855 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2856 NUM_BANKS(ADDR_SURF_16_BANK));
2857 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2860 NUM_BANKS(ADDR_SURF_16_BANK));
2861 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2864 NUM_BANKS(ADDR_SURF_16_BANK));
2865 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2868 NUM_BANKS(ADDR_SURF_8_BANK));
2869 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2872 NUM_BANKS(ADDR_SURF_4_BANK));
2873
2874 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2875 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2876 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2877 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
2878 break;
2879
2880 case 2:
2881 tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2883 PIPE_CONFIG(ADDR_SURF_P2) |
2884 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2885 tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2887 PIPE_CONFIG(ADDR_SURF_P2) |
2888 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2889 tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2891 PIPE_CONFIG(ADDR_SURF_P2) |
2892 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2893 tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2895 PIPE_CONFIG(ADDR_SURF_P2) |
2896 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2897 tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2899 PIPE_CONFIG(ADDR_SURF_P2) |
2900 TILE_SPLIT(split_equal_to_row_size));
2901 tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2902 PIPE_CONFIG(ADDR_SURF_P2) |
2903 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2904 tile[6] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2906 PIPE_CONFIG(ADDR_SURF_P2) |
2907 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2908 tile[7] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2910 PIPE_CONFIG(ADDR_SURF_P2) |
2911 TILE_SPLIT(split_equal_to_row_size));
2912 tile[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2913 PIPE_CONFIG(ADDR_SURF_P2);
2914 tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2915 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2916 PIPE_CONFIG(ADDR_SURF_P2));
2917 tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2919 PIPE_CONFIG(ADDR_SURF_P2) |
2920 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2921 tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2923 PIPE_CONFIG(ADDR_SURF_P2) |
2924 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2925 tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2927 PIPE_CONFIG(ADDR_SURF_P2) |
2928 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2929 tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930 PIPE_CONFIG(ADDR_SURF_P2) |
2931 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2932 tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2933 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2934 PIPE_CONFIG(ADDR_SURF_P2) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2937 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2938 PIPE_CONFIG(ADDR_SURF_P2) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 tile[17] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2941 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2942 PIPE_CONFIG(ADDR_SURF_P2) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2945 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2946 PIPE_CONFIG(ADDR_SURF_P2));
2947 tile[28] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2948 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2949 PIPE_CONFIG(ADDR_SURF_P2) |
2950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2951 tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2952 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2953 PIPE_CONFIG(ADDR_SURF_P2) |
2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2955 tile[30] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2956 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2957 PIPE_CONFIG(ADDR_SURF_P2) |
2958 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2959
2960 macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2963 NUM_BANKS(ADDR_SURF_16_BANK));
2964 macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2967 NUM_BANKS(ADDR_SURF_16_BANK));
2968 macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 NUM_BANKS(ADDR_SURF_16_BANK));
2972 macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2975 NUM_BANKS(ADDR_SURF_16_BANK));
2976 macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2979 NUM_BANKS(ADDR_SURF_16_BANK));
2980 macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 NUM_BANKS(ADDR_SURF_16_BANK));
2984 macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2987 NUM_BANKS(ADDR_SURF_8_BANK));
2988 macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2991 NUM_BANKS(ADDR_SURF_16_BANK));
2992 macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2995 NUM_BANKS(ADDR_SURF_16_BANK));
2996 macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2999 NUM_BANKS(ADDR_SURF_16_BANK));
3000 macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3003 NUM_BANKS(ADDR_SURF_16_BANK));
3004 macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3005 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3006 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3007 NUM_BANKS(ADDR_SURF_16_BANK));
3008 macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3011 NUM_BANKS(ADDR_SURF_16_BANK));
3012 macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3013 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3014 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3015 NUM_BANKS(ADDR_SURF_8_BANK));
3016
3017 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3018 WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
3019 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3020 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), macrotile[reg_offset]);
3021 break;
3022
3023 default:
3024 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3025 }
3026}
3027
3028/**
3029 * cik_select_se_sh - select which SE, SH to address
3030 *
3031 * @rdev: radeon_device pointer
3032 * @se_num: shader engine to address
3033 * @sh_num: sh block to address
3034 *
3035 * Select which SE, SH combinations to address. Certain
3036 * registers are instanced per SE or SH. 0xffffffff means
3037 * broadcast to all SEs or SHs (CIK).
3038 */
3039static void cik_select_se_sh(struct radeon_device *rdev,
3040 u32 se_num, u32 sh_num)
3041{
3042 u32 data = INSTANCE_BROADCAST_WRITES;
3043
3044 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3045 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3046 else if (se_num == 0xffffffff)
3047 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3048 else if (sh_num == 0xffffffff)
3049 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3050 else
3051 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3052 WREG32(GRBM_GFX_INDEX, data);
3053}
3054
3055/**
3056 * cik_create_bitmask - create a bitmask
3057 *
3058 * @bit_width: length of the mask
3059 *
3060 * create a variable length bit mask (CIK).
3061 * Returns the bitmask.
3062 */
3063static u32 cik_create_bitmask(u32 bit_width)
3064{
3065 u32 i, mask = 0;
3066
3067 for (i = 0; i < bit_width; i++) {
3068 mask <<= 1;
3069 mask |= 1;
3070 }
3071 return mask;
3072}
3073
3074/**
3075 * cik_get_rb_disabled - computes the mask of disabled RBs
3076 *
3077 * @rdev: radeon_device pointer
3078 * @max_rb_num: max RBs (render backends) for the asic
3079 * @se_num: number of SEs (shader engines) for the asic
3080 * @sh_per_se: number of SH blocks per SE for the asic
3081 *
3082 * Calculates the bitmask of disabled RBs (CIK).
3083 * Returns the disabled RB bitmask.
3084 */
3085static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3086 u32 max_rb_num_per_se,
3087 u32 sh_per_se)
3088{
3089 u32 data, mask;
3090
3091 data = RREG32(CC_RB_BACKEND_DISABLE);
3092 if (data & 1)
3093 data &= BACKEND_DISABLE_MASK;
3094 else
3095 data = 0;
3096 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3097
3098 data >>= BACKEND_DISABLE_SHIFT;
3099
3100 mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3101
3102 return data & mask;
3103}
3104
3105/**
3106 * cik_setup_rb - setup the RBs on the asic
3107 *
3108 * @rdev: radeon_device pointer
3109 * @se_num: number of SEs (shader engines) for the asic
3110 * @sh_per_se: number of SH blocks per SE for the asic
3111 * @max_rb_num: max RBs (render backends) for the asic
3112 *
3113 * Configures per-SE/SH RB registers (CIK).
3114 */
3115static void cik_setup_rb(struct radeon_device *rdev,
3116 u32 se_num, u32 sh_per_se,
3117 u32 max_rb_num_per_se)
3118{
3119 int i, j;
3120 u32 data, mask;
3121 u32 disabled_rbs = 0;
3122 u32 enabled_rbs = 0;
3123
3124 for (i = 0; i < se_num; i++) {
3125 for (j = 0; j < sh_per_se; j++) {
3126 cik_select_se_sh(rdev, i, j);
3127 data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3128 if (rdev->family == CHIP_HAWAII)
3129 disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3130 else
3131 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3132 }
3133 }
3134 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3135
3136 mask = 1;
3137 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3138 if (!(disabled_rbs & mask))
3139 enabled_rbs |= mask;
3140 mask <<= 1;
3141 }
3142
3143 rdev->config.cik.backend_enable_mask = enabled_rbs;
3144
3145 for (i = 0; i < se_num; i++) {
3146 cik_select_se_sh(rdev, i, 0xffffffff);
3147 data = 0;
3148 for (j = 0; j < sh_per_se; j++) {
3149 switch (enabled_rbs & 3) {
3150 case 0:
3151 if (j == 0)
3152 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3153 else
3154 data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3155 break;
3156 case 1:
3157 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3158 break;
3159 case 2:
3160 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3161 break;
3162 case 3:
3163 default:
3164 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3165 break;
3166 }
3167 enabled_rbs >>= 2;
3168 }
3169 WREG32(PA_SC_RASTER_CONFIG, data);
3170 }
3171 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3172}
3173
3174/**
3175 * cik_gpu_init - setup the 3D engine
3176 *
3177 * @rdev: radeon_device pointer
3178 *
3179 * Configures the 3D engine and tiling configuration
3180 * registers so that the 3D engine is usable.
3181 */
3182static void cik_gpu_init(struct radeon_device *rdev)
3183{
3184 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3185 u32 mc_shared_chmap, mc_arb_ramcfg;
3186 u32 hdp_host_path_cntl;
3187 u32 tmp;
3188 int i, j;
3189
3190 switch (rdev->family) {
3191 case CHIP_BONAIRE:
3192 rdev->config.cik.max_shader_engines = 2;
3193 rdev->config.cik.max_tile_pipes = 4;
3194 rdev->config.cik.max_cu_per_sh = 7;
3195 rdev->config.cik.max_sh_per_se = 1;
3196 rdev->config.cik.max_backends_per_se = 2;
3197 rdev->config.cik.max_texture_channel_caches = 4;
3198 rdev->config.cik.max_gprs = 256;
3199 rdev->config.cik.max_gs_threads = 32;
3200 rdev->config.cik.max_hw_contexts = 8;
3201
3202 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3203 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3204 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3205 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3206 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3207 break;
3208 case CHIP_HAWAII:
3209 rdev->config.cik.max_shader_engines = 4;
3210 rdev->config.cik.max_tile_pipes = 16;
3211 rdev->config.cik.max_cu_per_sh = 11;
3212 rdev->config.cik.max_sh_per_se = 1;
3213 rdev->config.cik.max_backends_per_se = 4;
3214 rdev->config.cik.max_texture_channel_caches = 16;
3215 rdev->config.cik.max_gprs = 256;
3216 rdev->config.cik.max_gs_threads = 32;
3217 rdev->config.cik.max_hw_contexts = 8;
3218
3219 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3220 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3221 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3222 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3223 gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3224 break;
3225 case CHIP_KAVERI:
3226 rdev->config.cik.max_shader_engines = 1;
3227 rdev->config.cik.max_tile_pipes = 4;
3228 rdev->config.cik.max_cu_per_sh = 8;
3229 rdev->config.cik.max_backends_per_se = 2;
3230 rdev->config.cik.max_sh_per_se = 1;
3231 rdev->config.cik.max_texture_channel_caches = 4;
3232 rdev->config.cik.max_gprs = 256;
3233 rdev->config.cik.max_gs_threads = 16;
3234 rdev->config.cik.max_hw_contexts = 8;
3235
3236 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241 break;
3242 case CHIP_KABINI:
3243 case CHIP_MULLINS:
3244 default:
3245 rdev->config.cik.max_shader_engines = 1;
3246 rdev->config.cik.max_tile_pipes = 2;
3247 rdev->config.cik.max_cu_per_sh = 2;
3248 rdev->config.cik.max_sh_per_se = 1;
3249 rdev->config.cik.max_backends_per_se = 1;
3250 rdev->config.cik.max_texture_channel_caches = 2;
3251 rdev->config.cik.max_gprs = 256;
3252 rdev->config.cik.max_gs_threads = 16;
3253 rdev->config.cik.max_hw_contexts = 8;
3254
3255 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3256 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3257 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3258 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3259 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3260 break;
3261 }
3262
3263 /* Initialize HDP */
3264 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3265 WREG32((0x2c14 + j), 0x00000000);
3266 WREG32((0x2c18 + j), 0x00000000);
3267 WREG32((0x2c1c + j), 0x00000000);
3268 WREG32((0x2c20 + j), 0x00000000);
3269 WREG32((0x2c24 + j), 0x00000000);
3270 }
3271
3272 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3273 WREG32(SRBM_INT_CNTL, 0x1);
3274 WREG32(SRBM_INT_ACK, 0x1);
3275
3276 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3277
3278 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3279 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3280
3281 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3282 rdev->config.cik.mem_max_burst_length_bytes = 256;
3283 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3284 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3285 if (rdev->config.cik.mem_row_size_in_kb > 4)
3286 rdev->config.cik.mem_row_size_in_kb = 4;
3287 /* XXX use MC settings? */
3288 rdev->config.cik.shader_engine_tile_size = 32;
3289 rdev->config.cik.num_gpus = 1;
3290 rdev->config.cik.multi_gpu_tile_size = 64;
3291
3292 /* fix up row size */
3293 gb_addr_config &= ~ROW_SIZE_MASK;
3294 switch (rdev->config.cik.mem_row_size_in_kb) {
3295 case 1:
3296 default:
3297 gb_addr_config |= ROW_SIZE(0);
3298 break;
3299 case 2:
3300 gb_addr_config |= ROW_SIZE(1);
3301 break;
3302 case 4:
3303 gb_addr_config |= ROW_SIZE(2);
3304 break;
3305 }
3306
3307 /* setup tiling info dword. gb_addr_config is not adequate since it does
3308 * not have bank info, so create a custom tiling dword.
3309 * bits 3:0 num_pipes
3310 * bits 7:4 num_banks
3311 * bits 11:8 group_size
3312 * bits 15:12 row_size
3313 */
3314 rdev->config.cik.tile_config = 0;
3315 switch (rdev->config.cik.num_tile_pipes) {
3316 case 1:
3317 rdev->config.cik.tile_config |= (0 << 0);
3318 break;
3319 case 2:
3320 rdev->config.cik.tile_config |= (1 << 0);
3321 break;
3322 case 4:
3323 rdev->config.cik.tile_config |= (2 << 0);
3324 break;
3325 case 8:
3326 default:
3327 /* XXX what about 12? */
3328 rdev->config.cik.tile_config |= (3 << 0);
3329 break;
3330 }
3331 rdev->config.cik.tile_config |=
3332 ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3333 rdev->config.cik.tile_config |=
3334 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3335 rdev->config.cik.tile_config |=
3336 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3337
3338 WREG32(GB_ADDR_CONFIG, gb_addr_config);
3339 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3340 WREG32(DMIF_ADDR_CALC, gb_addr_config);
3341 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3342 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3343 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3344 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3345 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3346
3347 cik_tiling_mode_table_init(rdev);
3348
3349 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3350 rdev->config.cik.max_sh_per_se,
3351 rdev->config.cik.max_backends_per_se);
3352
3353 rdev->config.cik.active_cus = 0;
3354 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3355 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3356 rdev->config.cik.active_cus +=
3357 hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3358 }
3359 }
3360
3361 /* set HW defaults for 3D engine */
3362 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3363
3364 WREG32(SX_DEBUG_1, 0x20);
3365
3366 WREG32(TA_CNTL_AUX, 0x00010000);
3367
3368 tmp = RREG32(SPI_CONFIG_CNTL);
3369 tmp |= 0x03000000;
3370 WREG32(SPI_CONFIG_CNTL, tmp);
3371
3372 WREG32(SQ_CONFIG, 1);
3373
3374 WREG32(DB_DEBUG, 0);
3375
3376 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3377 tmp |= 0x00000400;
3378 WREG32(DB_DEBUG2, tmp);
3379
3380 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3381 tmp |= 0x00020200;
3382 WREG32(DB_DEBUG3, tmp);
3383
3384 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3385 tmp |= 0x00018208;
3386 WREG32(CB_HW_CONTROL, tmp);
3387
3388 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3389
3390 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3391 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3392 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3393 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3394
3395 WREG32(VGT_NUM_INSTANCES, 1);
3396
3397 WREG32(CP_PERFMON_CNTL, 0);
3398
3399 WREG32(SQ_CONFIG, 0);
3400
3401 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3402 FORCE_EOV_MAX_REZ_CNT(255)));
3403
3404 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3405 AUTO_INVLD_EN(ES_AND_GS_AUTO));
3406
3407 WREG32(VGT_GS_VERTEX_REUSE, 16);
3408 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3409
3410 tmp = RREG32(HDP_MISC_CNTL);
3411 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3412 WREG32(HDP_MISC_CNTL, tmp);
3413
3414 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3415 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3416
3417 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3418 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3419
3420 udelay(50);
3421}
3422
3423/*
3424 * GPU scratch registers helpers function.
3425 */
3426/**
3427 * cik_scratch_init - setup driver info for CP scratch regs
3428 *
3429 * @rdev: radeon_device pointer
3430 *
3431 * Set up the number and offset of the CP scratch registers.
3432 * NOTE: use of CP scratch registers is a legacy inferface and
3433 * is not used by default on newer asics (r6xx+). On newer asics,
3434 * memory buffers are used for fences rather than scratch regs.
3435 */
3436static void cik_scratch_init(struct radeon_device *rdev)
3437{
3438 int i;
3439
3440 rdev->scratch.num_reg = 7;
3441 rdev->scratch.reg_base = SCRATCH_REG0;
3442 for (i = 0; i < rdev->scratch.num_reg; i++) {
3443 rdev->scratch.free[i] = true;
3444 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3445 }
3446}
3447
3448/**
3449 * cik_ring_test - basic gfx ring test
3450 *
3451 * @rdev: radeon_device pointer
3452 * @ring: radeon_ring structure holding ring information
3453 *
3454 * Allocate a scratch register and write to it using the gfx ring (CIK).
3455 * Provides a basic gfx ring test to verify that the ring is working.
3456 * Used by cik_cp_gfx_resume();
3457 * Returns 0 on success, error on failure.
3458 */
3459int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3460{
3461 uint32_t scratch;
3462 uint32_t tmp = 0;
3463 unsigned i;
3464 int r;
3465
3466 r = radeon_scratch_get(rdev, &scratch);
3467 if (r) {
3468 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3469 return r;
3470 }
3471 WREG32(scratch, 0xCAFEDEAD);
3472 r = radeon_ring_lock(rdev, ring, 3);
3473 if (r) {
3474 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3475 radeon_scratch_free(rdev, scratch);
3476 return r;
3477 }
3478 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3479 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3480 radeon_ring_write(ring, 0xDEADBEEF);
3481 radeon_ring_unlock_commit(rdev, ring, false);
3482
3483 for (i = 0; i < rdev->usec_timeout; i++) {
3484 tmp = RREG32(scratch);
3485 if (tmp == 0xDEADBEEF)
3486 break;
3487 udelay(1);
3488 }
3489 if (i < rdev->usec_timeout) {
3490 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3491 } else {
3492 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3493 ring->idx, scratch, tmp);
3494 r = -EINVAL;
3495 }
3496 radeon_scratch_free(rdev, scratch);
3497 return r;
3498}
3499
3500/**
3501 * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3502 *
3503 * @rdev: radeon_device pointer
3504 * @ridx: radeon ring index
3505 *
3506 * Emits an hdp flush on the cp.
3507 */
3508static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3509 int ridx)
3510{
3511 struct radeon_ring *ring = &rdev->ring[ridx];
3512 u32 ref_and_mask;
3513
3514 switch (ring->idx) {
3515 case CAYMAN_RING_TYPE_CP1_INDEX:
3516 case CAYMAN_RING_TYPE_CP2_INDEX:
3517 default:
3518 switch (ring->me) {
3519 case 0:
3520 ref_and_mask = CP2 << ring->pipe;
3521 break;
3522 case 1:
3523 ref_and_mask = CP6 << ring->pipe;
3524 break;
3525 default:
3526 return;
3527 }
3528 break;
3529 case RADEON_RING_TYPE_GFX_INDEX:
3530 ref_and_mask = CP0;
3531 break;
3532 }
3533
3534 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3535 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3536 WAIT_REG_MEM_FUNCTION(3) | /* == */
3537 WAIT_REG_MEM_ENGINE(1))); /* pfp */
3538 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3539 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3540 radeon_ring_write(ring, ref_and_mask);
3541 radeon_ring_write(ring, ref_and_mask);
3542 radeon_ring_write(ring, 0x20); /* poll interval */
3543}
3544
3545/**
3546 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3547 *
3548 * @rdev: radeon_device pointer
3549 * @fence: radeon fence object
3550 *
3551 * Emits a fence sequnce number on the gfx ring and flushes
3552 * GPU caches.
3553 */
3554void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3555 struct radeon_fence *fence)
3556{
3557 struct radeon_ring *ring = &rdev->ring[fence->ring];
3558 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3559
3560 /* Workaround for cache flush problems. First send a dummy EOP
3561 * event down the pipe with seq one below.
3562 */
3563 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3564 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3565 EOP_TC_ACTION_EN |
3566 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3567 EVENT_INDEX(5)));
3568 radeon_ring_write(ring, addr & 0xfffffffc);
3569 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3570 DATA_SEL(1) | INT_SEL(0));
3571 radeon_ring_write(ring, fence->seq - 1);
3572 radeon_ring_write(ring, 0);
3573
3574 /* Then send the real EOP event down the pipe. */
3575 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3576 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3577 EOP_TC_ACTION_EN |
3578 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3579 EVENT_INDEX(5)));
3580 radeon_ring_write(ring, addr & 0xfffffffc);
3581 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3582 radeon_ring_write(ring, fence->seq);
3583 radeon_ring_write(ring, 0);
3584}
3585
3586/**
3587 * cik_fence_compute_ring_emit - emit a fence on the compute ring
3588 *
3589 * @rdev: radeon_device pointer
3590 * @fence: radeon fence object
3591 *
3592 * Emits a fence sequnce number on the compute ring and flushes
3593 * GPU caches.
3594 */
3595void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3596 struct radeon_fence *fence)
3597{
3598 struct radeon_ring *ring = &rdev->ring[fence->ring];
3599 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3600
3601 /* RELEASE_MEM - flush caches, send int */
3602 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3603 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3604 EOP_TC_ACTION_EN |
3605 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3606 EVENT_INDEX(5)));
3607 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3608 radeon_ring_write(ring, addr & 0xfffffffc);
3609 radeon_ring_write(ring, upper_32_bits(addr));
3610 radeon_ring_write(ring, fence->seq);
3611 radeon_ring_write(ring, 0);
3612}
3613
3614/**
3615 * cik_semaphore_ring_emit - emit a semaphore on the CP ring
3616 *
3617 * @rdev: radeon_device pointer
3618 * @ring: radeon ring buffer object
3619 * @semaphore: radeon semaphore object
3620 * @emit_wait: Is this a sempahore wait?
3621 *
3622 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3623 * from running ahead of semaphore waits.
3624 */
3625bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3626 struct radeon_ring *ring,
3627 struct radeon_semaphore *semaphore,
3628 bool emit_wait)
3629{
3630 uint64_t addr = semaphore->gpu_addr;
3631 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3632
3633 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3634 radeon_ring_write(ring, lower_32_bits(addr));
3635 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3636
3637 if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
3638 /* Prevent the PFP from running ahead of the semaphore wait */
3639 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3640 radeon_ring_write(ring, 0x0);
3641 }
3642
3643 return true;
3644}
3645
3646/**
3647 * cik_copy_cpdma - copy pages using the CP DMA engine
3648 *
3649 * @rdev: radeon_device pointer
3650 * @src_offset: src GPU address
3651 * @dst_offset: dst GPU address
3652 * @num_gpu_pages: number of GPU pages to xfer
3653 * @resv: reservation object to sync to
3654 *
3655 * Copy GPU paging using the CP DMA engine (CIK+).
3656 * Used by the radeon ttm implementation to move pages if
3657 * registered as the asic copy callback.
3658 */
3659struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
3660 uint64_t src_offset, uint64_t dst_offset,
3661 unsigned num_gpu_pages,
3662 struct dma_resv *resv)
3663{
3664 struct radeon_fence *fence;
3665 struct radeon_sync sync;
3666 int ring_index = rdev->asic->copy.blit_ring_index;
3667 struct radeon_ring *ring = &rdev->ring[ring_index];
3668 u32 size_in_bytes, cur_size_in_bytes, control;
3669 int i, num_loops;
3670 int r = 0;
3671
3672 radeon_sync_create(&sync);
3673
3674 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3675 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3676 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3677 if (r) {
3678 DRM_ERROR("radeon: moving bo (%d).\n", r);
3679 radeon_sync_free(rdev, &sync, NULL);
3680 return ERR_PTR(r);
3681 }
3682
3683 radeon_sync_resv(rdev, &sync, resv, false);
3684 radeon_sync_rings(rdev, &sync, ring->idx);
3685
3686 for (i = 0; i < num_loops; i++) {
3687 cur_size_in_bytes = size_in_bytes;
3688 if (cur_size_in_bytes > 0x1fffff)
3689 cur_size_in_bytes = 0x1fffff;
3690 size_in_bytes -= cur_size_in_bytes;
3691 control = 0;
3692 if (size_in_bytes == 0)
3693 control |= PACKET3_DMA_DATA_CP_SYNC;
3694 radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3695 radeon_ring_write(ring, control);
3696 radeon_ring_write(ring, lower_32_bits(src_offset));
3697 radeon_ring_write(ring, upper_32_bits(src_offset));
3698 radeon_ring_write(ring, lower_32_bits(dst_offset));
3699 radeon_ring_write(ring, upper_32_bits(dst_offset));
3700 radeon_ring_write(ring, cur_size_in_bytes);
3701 src_offset += cur_size_in_bytes;
3702 dst_offset += cur_size_in_bytes;
3703 }
3704
3705 r = radeon_fence_emit(rdev, &fence, ring->idx);
3706 if (r) {
3707 radeon_ring_unlock_undo(rdev, ring);
3708 radeon_sync_free(rdev, &sync, NULL);
3709 return ERR_PTR(r);
3710 }
3711
3712 radeon_ring_unlock_commit(rdev, ring, false);
3713 radeon_sync_free(rdev, &sync, fence);
3714
3715 return fence;
3716}
3717
3718/*
3719 * IB stuff
3720 */
3721/**
3722 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3723 *
3724 * @rdev: radeon_device pointer
3725 * @ib: radeon indirect buffer object
3726 *
3727 * Emits a DE (drawing engine) or CE (constant engine) IB
3728 * on the gfx ring. IBs are usually generated by userspace
3729 * acceleration drivers and submitted to the kernel for
3730 * scheduling on the ring. This function schedules the IB
3731 * on the gfx ring for execution by the GPU.
3732 */
3733void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3734{
3735 struct radeon_ring *ring = &rdev->ring[ib->ring];
3736 unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3737 u32 header, control = INDIRECT_BUFFER_VALID;
3738
3739 if (ib->is_const_ib) {
3740 /* set switch buffer packet before const IB */
3741 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3742 radeon_ring_write(ring, 0);
3743
3744 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3745 } else {
3746 u32 next_rptr;
3747 if (ring->rptr_save_reg) {
3748 next_rptr = ring->wptr + 3 + 4;
3749 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3750 radeon_ring_write(ring, ((ring->rptr_save_reg -
3751 PACKET3_SET_UCONFIG_REG_START) >> 2));
3752 radeon_ring_write(ring, next_rptr);
3753 } else if (rdev->wb.enabled) {
3754 next_rptr = ring->wptr + 5 + 4;
3755 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3756 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3757 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3758 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3759 radeon_ring_write(ring, next_rptr);
3760 }
3761
3762 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3763 }
3764
3765 control |= ib->length_dw | (vm_id << 24);
3766
3767 radeon_ring_write(ring, header);
3768 radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
3769 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3770 radeon_ring_write(ring, control);
3771}
3772
3773/**
3774 * cik_ib_test - basic gfx ring IB test
3775 *
3776 * @rdev: radeon_device pointer
3777 * @ring: radeon_ring structure holding ring information
3778 *
3779 * Allocate an IB and execute it on the gfx ring (CIK).
3780 * Provides a basic gfx ring test to verify that IBs are working.
3781 * Returns 0 on success, error on failure.
3782 */
3783int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3784{
3785 struct radeon_ib ib;
3786 uint32_t scratch;
3787 uint32_t tmp = 0;
3788 unsigned i;
3789 int r;
3790
3791 r = radeon_scratch_get(rdev, &scratch);
3792 if (r) {
3793 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3794 return r;
3795 }
3796 WREG32(scratch, 0xCAFEDEAD);
3797 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3798 if (r) {
3799 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3800 radeon_scratch_free(rdev, scratch);
3801 return r;
3802 }
3803 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3804 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3805 ib.ptr[2] = 0xDEADBEEF;
3806 ib.length_dw = 3;
3807 r = radeon_ib_schedule(rdev, &ib, NULL, false);
3808 if (r) {
3809 radeon_scratch_free(rdev, scratch);
3810 radeon_ib_free(rdev, &ib);
3811 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3812 return r;
3813 }
3814 r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies(
3815 RADEON_USEC_IB_TEST_TIMEOUT));
3816 if (r < 0) {
3817 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3818 radeon_scratch_free(rdev, scratch);
3819 radeon_ib_free(rdev, &ib);
3820 return r;
3821 } else if (r == 0) {
3822 DRM_ERROR("radeon: fence wait timed out.\n");
3823 radeon_scratch_free(rdev, scratch);
3824 radeon_ib_free(rdev, &ib);
3825 return -ETIMEDOUT;
3826 }
3827 r = 0;
3828 for (i = 0; i < rdev->usec_timeout; i++) {
3829 tmp = RREG32(scratch);
3830 if (tmp == 0xDEADBEEF)
3831 break;
3832 udelay(1);
3833 }
3834 if (i < rdev->usec_timeout) {
3835 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3836 } else {
3837 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3838 scratch, tmp);
3839 r = -EINVAL;
3840 }
3841 radeon_scratch_free(rdev, scratch);
3842 radeon_ib_free(rdev, &ib);
3843 return r;
3844}
3845
3846/*
3847 * CP.
3848 * On CIK, gfx and compute now have independant command processors.
3849 *
3850 * GFX
3851 * Gfx consists of a single ring and can process both gfx jobs and
3852 * compute jobs. The gfx CP consists of three microengines (ME):
3853 * PFP - Pre-Fetch Parser
3854 * ME - Micro Engine
3855 * CE - Constant Engine
3856 * The PFP and ME make up what is considered the Drawing Engine (DE).
3857 * The CE is an asynchronous engine used for updating buffer desciptors
3858 * used by the DE so that they can be loaded into cache in parallel
3859 * while the DE is processing state update packets.
3860 *
3861 * Compute
3862 * The compute CP consists of two microengines (ME):
3863 * MEC1 - Compute MicroEngine 1
3864 * MEC2 - Compute MicroEngine 2
3865 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3866 * The queues are exposed to userspace and are programmed directly
3867 * by the compute runtime.
3868 */
3869/**
3870 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3871 *
3872 * @rdev: radeon_device pointer
3873 * @enable: enable or disable the MEs
3874 *
3875 * Halts or unhalts the gfx MEs.
3876 */
3877static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3878{
3879 if (enable)
3880 WREG32(CP_ME_CNTL, 0);
3881 else {
3882 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3883 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3884 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3885 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3886 }
3887 udelay(50);
3888}
3889
3890/**
3891 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3892 *
3893 * @rdev: radeon_device pointer
3894 *
3895 * Loads the gfx PFP, ME, and CE ucode.
3896 * Returns 0 for success, -EINVAL if the ucode is not available.
3897 */
3898static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3899{
3900 int i;
3901
3902 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3903 return -EINVAL;
3904
3905 cik_cp_gfx_enable(rdev, false);
3906
3907 if (rdev->new_fw) {
3908 const struct gfx_firmware_header_v1_0 *pfp_hdr =
3909 (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3910 const struct gfx_firmware_header_v1_0 *ce_hdr =
3911 (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3912 const struct gfx_firmware_header_v1_0 *me_hdr =
3913 (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3914 const __le32 *fw_data;
3915 u32 fw_size;
3916
3917 radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3918 radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3919 radeon_ucode_print_gfx_hdr(&me_hdr->header);
3920
3921 /* PFP */
3922 fw_data = (const __le32 *)
3923 (rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3924 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3925 WREG32(CP_PFP_UCODE_ADDR, 0);
3926 for (i = 0; i < fw_size; i++)
3927 WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3928 WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
3929
3930 /* CE */
3931 fw_data = (const __le32 *)
3932 (rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3933 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3934 WREG32(CP_CE_UCODE_ADDR, 0);
3935 for (i = 0; i < fw_size; i++)
3936 WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3937 WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
3938
3939 /* ME */
3940 fw_data = (const __be32 *)
3941 (rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3942 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3943 WREG32(CP_ME_RAM_WADDR, 0);
3944 for (i = 0; i < fw_size; i++)
3945 WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3946 WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
3947 WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
3948 } else {
3949 const __be32 *fw_data;
3950
3951 /* PFP */
3952 fw_data = (const __be32 *)rdev->pfp_fw->data;
3953 WREG32(CP_PFP_UCODE_ADDR, 0);
3954 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3955 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3956 WREG32(CP_PFP_UCODE_ADDR, 0);
3957
3958 /* CE */
3959 fw_data = (const __be32 *)rdev->ce_fw->data;
3960 WREG32(CP_CE_UCODE_ADDR, 0);
3961 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3962 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3963 WREG32(CP_CE_UCODE_ADDR, 0);
3964
3965 /* ME */
3966 fw_data = (const __be32 *)rdev->me_fw->data;
3967 WREG32(CP_ME_RAM_WADDR, 0);
3968 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3969 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3970 WREG32(CP_ME_RAM_WADDR, 0);
3971 }
3972
3973 return 0;
3974}
3975
3976/**
3977 * cik_cp_gfx_start - start the gfx ring
3978 *
3979 * @rdev: radeon_device pointer
3980 *
3981 * Enables the ring and loads the clear state context and other
3982 * packets required to init the ring.
3983 * Returns 0 for success, error for failure.
3984 */
3985static int cik_cp_gfx_start(struct radeon_device *rdev)
3986{
3987 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3988 int r, i;
3989
3990 /* init the CP */
3991 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3992 WREG32(CP_ENDIAN_SWAP, 0);
3993 WREG32(CP_DEVICE_ID, 1);
3994
3995 cik_cp_gfx_enable(rdev, true);
3996
3997 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3998 if (r) {
3999 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4000 return r;
4001 }
4002
4003 /* init the CE partitions. CE only used for gfx on CIK */
4004 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4005 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4006 radeon_ring_write(ring, 0x8000);
4007 radeon_ring_write(ring, 0x8000);
4008
4009 /* setup clear context state */
4010 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4011 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4012
4013 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4014 radeon_ring_write(ring, 0x80000000);
4015 radeon_ring_write(ring, 0x80000000);
4016
4017 for (i = 0; i < cik_default_size; i++)
4018 radeon_ring_write(ring, cik_default_state[i]);
4019
4020 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4021 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4022
4023 /* set clear context state */
4024 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4025 radeon_ring_write(ring, 0);
4026
4027 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4028 radeon_ring_write(ring, 0x00000316);
4029 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4030 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4031
4032 radeon_ring_unlock_commit(rdev, ring, false);
4033
4034 return 0;
4035}
4036
4037/**
4038 * cik_cp_gfx_fini - stop the gfx ring
4039 *
4040 * @rdev: radeon_device pointer
4041 *
4042 * Stop the gfx ring and tear down the driver ring
4043 * info.
4044 */
4045static void cik_cp_gfx_fini(struct radeon_device *rdev)
4046{
4047 cik_cp_gfx_enable(rdev, false);
4048 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4049}
4050
4051/**
4052 * cik_cp_gfx_resume - setup the gfx ring buffer registers
4053 *
4054 * @rdev: radeon_device pointer
4055 *
4056 * Program the location and size of the gfx ring buffer
4057 * and test it to make sure it's working.
4058 * Returns 0 for success, error for failure.
4059 */
4060static int cik_cp_gfx_resume(struct radeon_device *rdev)
4061{
4062 struct radeon_ring *ring;
4063 u32 tmp;
4064 u32 rb_bufsz;
4065 u64 rb_addr;
4066 int r;
4067
4068 WREG32(CP_SEM_WAIT_TIMER, 0x0);
4069 if (rdev->family != CHIP_HAWAII)
4070 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4071
4072 /* Set the write pointer delay */
4073 WREG32(CP_RB_WPTR_DELAY, 0);
4074
4075 /* set the RB to use vmid 0 */
4076 WREG32(CP_RB_VMID, 0);
4077
4078 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4079
4080 /* ring 0 - compute and gfx */
4081 /* Set ring buffer size */
4082 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4083 rb_bufsz = order_base_2(ring->ring_size / 8);
4084 tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4085#ifdef __BIG_ENDIAN
4086 tmp |= BUF_SWAP_32BIT;
4087#endif
4088 WREG32(CP_RB0_CNTL, tmp);
4089
4090 /* Initialize the ring buffer's read and write pointers */
4091 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4092 ring->wptr = 0;
4093 WREG32(CP_RB0_WPTR, ring->wptr);
4094
4095 /* set the wb address wether it's enabled or not */
4096 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4097 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4098
4099 /* scratch register shadowing is no longer supported */
4100 WREG32(SCRATCH_UMSK, 0);
4101
4102 if (!rdev->wb.enabled)
4103 tmp |= RB_NO_UPDATE;
4104
4105 mdelay(1);
4106 WREG32(CP_RB0_CNTL, tmp);
4107
4108 rb_addr = ring->gpu_addr >> 8;
4109 WREG32(CP_RB0_BASE, rb_addr);
4110 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4111
4112 /* start the ring */
4113 cik_cp_gfx_start(rdev);
4114 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4115 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4116 if (r) {
4117 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4118 return r;
4119 }
4120
4121 if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4122 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4123
4124 return 0;
4125}
4126
4127u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4128 struct radeon_ring *ring)
4129{
4130 u32 rptr;
4131
4132 if (rdev->wb.enabled)
4133 rptr = rdev->wb.wb[ring->rptr_offs/4];
4134 else
4135 rptr = RREG32(CP_RB0_RPTR);
4136
4137 return rptr;
4138}
4139
4140u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4141 struct radeon_ring *ring)
4142{
4143 return RREG32(CP_RB0_WPTR);
4144}
4145
4146void cik_gfx_set_wptr(struct radeon_device *rdev,
4147 struct radeon_ring *ring)
4148{
4149 WREG32(CP_RB0_WPTR, ring->wptr);
4150 (void)RREG32(CP_RB0_WPTR);
4151}
4152
4153u32 cik_compute_get_rptr(struct radeon_device *rdev,
4154 struct radeon_ring *ring)
4155{
4156 u32 rptr;
4157
4158 if (rdev->wb.enabled) {
4159 rptr = rdev->wb.wb[ring->rptr_offs/4];
4160 } else {
4161 mutex_lock(&rdev->srbm_mutex);
4162 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4163 rptr = RREG32(CP_HQD_PQ_RPTR);
4164 cik_srbm_select(rdev, 0, 0, 0, 0);
4165 mutex_unlock(&rdev->srbm_mutex);
4166 }
4167
4168 return rptr;
4169}
4170
4171u32 cik_compute_get_wptr(struct radeon_device *rdev,
4172 struct radeon_ring *ring)
4173{
4174 u32 wptr;
4175
4176 if (rdev->wb.enabled) {
4177 /* XXX check if swapping is necessary on BE */
4178 wptr = rdev->wb.wb[ring->wptr_offs/4];
4179 } else {
4180 mutex_lock(&rdev->srbm_mutex);
4181 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4182 wptr = RREG32(CP_HQD_PQ_WPTR);
4183 cik_srbm_select(rdev, 0, 0, 0, 0);
4184 mutex_unlock(&rdev->srbm_mutex);
4185 }
4186
4187 return wptr;
4188}
4189
4190void cik_compute_set_wptr(struct radeon_device *rdev,
4191 struct radeon_ring *ring)
4192{
4193 /* XXX check if swapping is necessary on BE */
4194 rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4195 WDOORBELL32(ring->doorbell_index, ring->wptr);
4196}
4197
4198static void cik_compute_stop(struct radeon_device *rdev,
4199 struct radeon_ring *ring)
4200{
4201 u32 j, tmp;
4202
4203 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4204 /* Disable wptr polling. */
4205 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4206 tmp &= ~WPTR_POLL_EN;
4207 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4208 /* Disable HQD. */
4209 if (RREG32(CP_HQD_ACTIVE) & 1) {
4210 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4211 for (j = 0; j < rdev->usec_timeout; j++) {
4212 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4213 break;
4214 udelay(1);
4215 }
4216 WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4217 WREG32(CP_HQD_PQ_RPTR, 0);
4218 WREG32(CP_HQD_PQ_WPTR, 0);
4219 }
4220 cik_srbm_select(rdev, 0, 0, 0, 0);
4221}
4222
4223/**
4224 * cik_cp_compute_enable - enable/disable the compute CP MEs
4225 *
4226 * @rdev: radeon_device pointer
4227 * @enable: enable or disable the MEs
4228 *
4229 * Halts or unhalts the compute MEs.
4230 */
4231static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4232{
4233 if (enable)
4234 WREG32(CP_MEC_CNTL, 0);
4235 else {
4236 /*
4237 * To make hibernation reliable we need to clear compute ring
4238 * configuration before halting the compute ring.
4239 */
4240 mutex_lock(&rdev->srbm_mutex);
4241 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4242 cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4243 mutex_unlock(&rdev->srbm_mutex);
4244
4245 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4246 rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4247 rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4248 }
4249 udelay(50);
4250}
4251
4252/**
4253 * cik_cp_compute_load_microcode - load the compute CP ME ucode
4254 *
4255 * @rdev: radeon_device pointer
4256 *
4257 * Loads the compute MEC1&2 ucode.
4258 * Returns 0 for success, -EINVAL if the ucode is not available.
4259 */
4260static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4261{
4262 int i;
4263
4264 if (!rdev->mec_fw)
4265 return -EINVAL;
4266
4267 cik_cp_compute_enable(rdev, false);
4268
4269 if (rdev->new_fw) {
4270 const struct gfx_firmware_header_v1_0 *mec_hdr =
4271 (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4272 const __le32 *fw_data;
4273 u32 fw_size;
4274
4275 radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4276
4277 /* MEC1 */
4278 fw_data = (const __le32 *)
4279 (rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4280 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4281 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4282 for (i = 0; i < fw_size; i++)
4283 WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4284 WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4285
4286 /* MEC2 */
4287 if (rdev->family == CHIP_KAVERI) {
4288 const struct gfx_firmware_header_v1_0 *mec2_hdr =
4289 (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4290
4291 fw_data = (const __le32 *)
4292 (rdev->mec2_fw->data +
4293 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4294 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4295 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4296 for (i = 0; i < fw_size; i++)
4297 WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4298 WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4299 }
4300 } else {
4301 const __be32 *fw_data;
4302
4303 /* MEC1 */
4304 fw_data = (const __be32 *)rdev->mec_fw->data;
4305 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4306 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4307 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4308 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4309
4310 if (rdev->family == CHIP_KAVERI) {
4311 /* MEC2 */
4312 fw_data = (const __be32 *)rdev->mec_fw->data;
4313 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4314 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4315 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4316 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4317 }
4318 }
4319
4320 return 0;
4321}
4322
4323/**
4324 * cik_cp_compute_start - start the compute queues
4325 *
4326 * @rdev: radeon_device pointer
4327 *
4328 * Enable the compute queues.
4329 * Returns 0 for success, error for failure.
4330 */
4331static int cik_cp_compute_start(struct radeon_device *rdev)
4332{
4333 cik_cp_compute_enable(rdev, true);
4334
4335 return 0;
4336}
4337
4338/**
4339 * cik_cp_compute_fini - stop the compute queues
4340 *
4341 * @rdev: radeon_device pointer
4342 *
4343 * Stop the compute queues and tear down the driver queue
4344 * info.
4345 */
4346static void cik_cp_compute_fini(struct radeon_device *rdev)
4347{
4348 int i, idx, r;
4349
4350 cik_cp_compute_enable(rdev, false);
4351
4352 for (i = 0; i < 2; i++) {
4353 if (i == 0)
4354 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4355 else
4356 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4357
4358 if (rdev->ring[idx].mqd_obj) {
4359 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4360 if (unlikely(r != 0))
4361 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4362
4363 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4364 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4365
4366 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4367 rdev->ring[idx].mqd_obj = NULL;
4368 }
4369 }
4370}
4371
4372static void cik_mec_fini(struct radeon_device *rdev)
4373{
4374 int r;
4375
4376 if (rdev->mec.hpd_eop_obj) {
4377 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4378 if (unlikely(r != 0))
4379 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4380 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4381 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4382
4383 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4384 rdev->mec.hpd_eop_obj = NULL;
4385 }
4386}
4387
4388#define MEC_HPD_SIZE 2048
4389
4390static int cik_mec_init(struct radeon_device *rdev)
4391{
4392 int r;
4393 u32 *hpd;
4394
4395 /*
4396 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4397 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4398 */
4399 if (rdev->family == CHIP_KAVERI)
4400 rdev->mec.num_mec = 2;
4401 else
4402 rdev->mec.num_mec = 1;
4403 rdev->mec.num_pipe = 4;
4404 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4405
4406 if (rdev->mec.hpd_eop_obj == NULL) {
4407 r = radeon_bo_create(rdev,
4408 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4409 PAGE_SIZE, true,
4410 RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4411 &rdev->mec.hpd_eop_obj);
4412 if (r) {
4413 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4414 return r;
4415 }
4416 }
4417
4418 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4419 if (unlikely(r != 0)) {
4420 cik_mec_fini(rdev);
4421 return r;
4422 }
4423 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4424 &rdev->mec.hpd_eop_gpu_addr);
4425 if (r) {
4426 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4427 cik_mec_fini(rdev);
4428 return r;
4429 }
4430 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4431 if (r) {
4432 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4433 cik_mec_fini(rdev);
4434 return r;
4435 }
4436
4437 /* clear memory. Not sure if this is required or not */
4438 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4439
4440 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4441 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4442
4443 return 0;
4444}
4445
4446struct hqd_registers
4447{
4448 u32 cp_mqd_base_addr;
4449 u32 cp_mqd_base_addr_hi;
4450 u32 cp_hqd_active;
4451 u32 cp_hqd_vmid;
4452 u32 cp_hqd_persistent_state;
4453 u32 cp_hqd_pipe_priority;
4454 u32 cp_hqd_queue_priority;
4455 u32 cp_hqd_quantum;
4456 u32 cp_hqd_pq_base;
4457 u32 cp_hqd_pq_base_hi;
4458 u32 cp_hqd_pq_rptr;
4459 u32 cp_hqd_pq_rptr_report_addr;
4460 u32 cp_hqd_pq_rptr_report_addr_hi;
4461 u32 cp_hqd_pq_wptr_poll_addr;
4462 u32 cp_hqd_pq_wptr_poll_addr_hi;
4463 u32 cp_hqd_pq_doorbell_control;
4464 u32 cp_hqd_pq_wptr;
4465 u32 cp_hqd_pq_control;
4466 u32 cp_hqd_ib_base_addr;
4467 u32 cp_hqd_ib_base_addr_hi;
4468 u32 cp_hqd_ib_rptr;
4469 u32 cp_hqd_ib_control;
4470 u32 cp_hqd_iq_timer;
4471 u32 cp_hqd_iq_rptr;
4472 u32 cp_hqd_dequeue_request;
4473 u32 cp_hqd_dma_offload;
4474 u32 cp_hqd_sema_cmd;
4475 u32 cp_hqd_msg_type;
4476 u32 cp_hqd_atomic0_preop_lo;
4477 u32 cp_hqd_atomic0_preop_hi;
4478 u32 cp_hqd_atomic1_preop_lo;
4479 u32 cp_hqd_atomic1_preop_hi;
4480 u32 cp_hqd_hq_scheduler0;
4481 u32 cp_hqd_hq_scheduler1;
4482 u32 cp_mqd_control;
4483};
4484
4485struct bonaire_mqd
4486{
4487 u32 header;
4488 u32 dispatch_initiator;
4489 u32 dimensions[3];
4490 u32 start_idx[3];
4491 u32 num_threads[3];
4492 u32 pipeline_stat_enable;
4493 u32 perf_counter_enable;
4494 u32 pgm[2];
4495 u32 tba[2];
4496 u32 tma[2];
4497 u32 pgm_rsrc[2];
4498 u32 vmid;
4499 u32 resource_limits;
4500 u32 static_thread_mgmt01[2];
4501 u32 tmp_ring_size;
4502 u32 static_thread_mgmt23[2];
4503 u32 restart[3];
4504 u32 thread_trace_enable;
4505 u32 reserved1;
4506 u32 user_data[16];
4507 u32 vgtcs_invoke_count[2];
4508 struct hqd_registers queue_state;
4509 u32 dequeue_cntr;
4510 u32 interrupt_queue[64];
4511};
4512
4513/**
4514 * cik_cp_compute_resume - setup the compute queue registers
4515 *
4516 * @rdev: radeon_device pointer
4517 *
4518 * Program the compute queues and test them to make sure they
4519 * are working.
4520 * Returns 0 for success, error for failure.
4521 */
4522static int cik_cp_compute_resume(struct radeon_device *rdev)
4523{
4524 int r, i, j, idx;
4525 u32 tmp;
4526 bool use_doorbell = true;
4527 u64 hqd_gpu_addr;
4528 u64 mqd_gpu_addr;
4529 u64 eop_gpu_addr;
4530 u64 wb_gpu_addr;
4531 u32 *buf;
4532 struct bonaire_mqd *mqd;
4533
4534 r = cik_cp_compute_start(rdev);
4535 if (r)
4536 return r;
4537
4538 /* fix up chicken bits */
4539 tmp = RREG32(CP_CPF_DEBUG);
4540 tmp |= (1 << 23);
4541 WREG32(CP_CPF_DEBUG, tmp);
4542
4543 /* init the pipes */
4544 mutex_lock(&rdev->srbm_mutex);
4545
4546 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); ++i) {
4547 int me = (i < 4) ? 1 : 2;
4548 int pipe = (i < 4) ? i : (i - 4);
4549
4550 cik_srbm_select(rdev, me, pipe, 0, 0);
4551
4552 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
4553 /* write the EOP addr */
4554 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4555 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4556
4557 /* set the VMID assigned */
4558 WREG32(CP_HPD_EOP_VMID, 0);
4559
4560 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4561 tmp = RREG32(CP_HPD_EOP_CONTROL);
4562 tmp &= ~EOP_SIZE_MASK;
4563 tmp |= order_base_2(MEC_HPD_SIZE / 8);
4564 WREG32(CP_HPD_EOP_CONTROL, tmp);
4565
4566 }
4567 cik_srbm_select(rdev, 0, 0, 0, 0);
4568 mutex_unlock(&rdev->srbm_mutex);
4569
4570 /* init the queues. Just two for now. */
4571 for (i = 0; i < 2; i++) {
4572 if (i == 0)
4573 idx = CAYMAN_RING_TYPE_CP1_INDEX;
4574 else
4575 idx = CAYMAN_RING_TYPE_CP2_INDEX;
4576
4577 if (rdev->ring[idx].mqd_obj == NULL) {
4578 r = radeon_bo_create(rdev,
4579 sizeof(struct bonaire_mqd),
4580 PAGE_SIZE, true,
4581 RADEON_GEM_DOMAIN_GTT, 0, NULL,
4582 NULL, &rdev->ring[idx].mqd_obj);
4583 if (r) {
4584 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4585 return r;
4586 }
4587 }
4588
4589 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4590 if (unlikely(r != 0)) {
4591 cik_cp_compute_fini(rdev);
4592 return r;
4593 }
4594 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4595 &mqd_gpu_addr);
4596 if (r) {
4597 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4598 cik_cp_compute_fini(rdev);
4599 return r;
4600 }
4601 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4602 if (r) {
4603 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4604 cik_cp_compute_fini(rdev);
4605 return r;
4606 }
4607
4608 /* init the mqd struct */
4609 memset(buf, 0, sizeof(struct bonaire_mqd));
4610
4611 mqd = (struct bonaire_mqd *)buf;
4612 mqd->header = 0xC0310800;
4613 mqd->static_thread_mgmt01[0] = 0xffffffff;
4614 mqd->static_thread_mgmt01[1] = 0xffffffff;
4615 mqd->static_thread_mgmt23[0] = 0xffffffff;
4616 mqd->static_thread_mgmt23[1] = 0xffffffff;
4617
4618 mutex_lock(&rdev->srbm_mutex);
4619 cik_srbm_select(rdev, rdev->ring[idx].me,
4620 rdev->ring[idx].pipe,
4621 rdev->ring[idx].queue, 0);
4622
4623 /* disable wptr polling */
4624 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4625 tmp &= ~WPTR_POLL_EN;
4626 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4627
4628 /* enable doorbell? */
4629 mqd->queue_state.cp_hqd_pq_doorbell_control =
4630 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4631 if (use_doorbell)
4632 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4633 else
4634 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4635 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4636 mqd->queue_state.cp_hqd_pq_doorbell_control);
4637
4638 /* disable the queue if it's active */
4639 mqd->queue_state.cp_hqd_dequeue_request = 0;
4640 mqd->queue_state.cp_hqd_pq_rptr = 0;
4641 mqd->queue_state.cp_hqd_pq_wptr= 0;
4642 if (RREG32(CP_HQD_ACTIVE) & 1) {
4643 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4644 for (j = 0; j < rdev->usec_timeout; j++) {
4645 if (!(RREG32(CP_HQD_ACTIVE) & 1))
4646 break;
4647 udelay(1);
4648 }
4649 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4650 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4651 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4652 }
4653
4654 /* set the pointer to the MQD */
4655 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4656 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4657 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4658 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4659 /* set MQD vmid to 0 */
4660 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4661 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4662 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4663
4664 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4665 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4666 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4667 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4668 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4669 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4670
4671 /* set up the HQD, this is similar to CP_RB0_CNTL */
4672 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4673 mqd->queue_state.cp_hqd_pq_control &=
4674 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4675
4676 mqd->queue_state.cp_hqd_pq_control |=
4677 order_base_2(rdev->ring[idx].ring_size / 8);
4678 mqd->queue_state.cp_hqd_pq_control |=
4679 (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4680#ifdef __BIG_ENDIAN
4681 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4682#endif
4683 mqd->queue_state.cp_hqd_pq_control &=
4684 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4685 mqd->queue_state.cp_hqd_pq_control |=
4686 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4687 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4688
4689 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4690 if (i == 0)
4691 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4692 else
4693 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4694 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4695 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4696 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4697 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4698 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4699
4700 /* set the wb address wether it's enabled or not */
4701 if (i == 0)
4702 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4703 else
4704 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4705 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4706 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4707 upper_32_bits(wb_gpu_addr) & 0xffff;
4708 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4709 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4710 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4711 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4712
4713 /* enable the doorbell if requested */
4714 if (use_doorbell) {
4715 mqd->queue_state.cp_hqd_pq_doorbell_control =
4716 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4717 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4718 mqd->queue_state.cp_hqd_pq_doorbell_control |=
4719 DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4720 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4721 mqd->queue_state.cp_hqd_pq_doorbell_control &=
4722 ~(DOORBELL_SOURCE | DOORBELL_HIT);
4723
4724 } else {
4725 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4726 }
4727 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4728 mqd->queue_state.cp_hqd_pq_doorbell_control);
4729
4730 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4731 rdev->ring[idx].wptr = 0;
4732 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4733 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4734 mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4735
4736 /* set the vmid for the queue */
4737 mqd->queue_state.cp_hqd_vmid = 0;
4738 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4739
4740 /* activate the queue */
4741 mqd->queue_state.cp_hqd_active = 1;
4742 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4743
4744 cik_srbm_select(rdev, 0, 0, 0, 0);
4745 mutex_unlock(&rdev->srbm_mutex);
4746
4747 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4748 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4749
4750 rdev->ring[idx].ready = true;
4751 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4752 if (r)
4753 rdev->ring[idx].ready = false;
4754 }
4755
4756 return 0;
4757}
4758
4759static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4760{
4761 cik_cp_gfx_enable(rdev, enable);
4762 cik_cp_compute_enable(rdev, enable);
4763}
4764
4765static int cik_cp_load_microcode(struct radeon_device *rdev)
4766{
4767 int r;
4768
4769 r = cik_cp_gfx_load_microcode(rdev);
4770 if (r)
4771 return r;
4772 r = cik_cp_compute_load_microcode(rdev);
4773 if (r)
4774 return r;
4775
4776 return 0;
4777}
4778
4779static void cik_cp_fini(struct radeon_device *rdev)
4780{
4781 cik_cp_gfx_fini(rdev);
4782 cik_cp_compute_fini(rdev);
4783}
4784
4785static int cik_cp_resume(struct radeon_device *rdev)
4786{
4787 int r;
4788
4789 cik_enable_gui_idle_interrupt(rdev, false);
4790
4791 r = cik_cp_load_microcode(rdev);
4792 if (r)
4793 return r;
4794
4795 r = cik_cp_gfx_resume(rdev);
4796 if (r)
4797 return r;
4798 r = cik_cp_compute_resume(rdev);
4799 if (r)
4800 return r;
4801
4802 cik_enable_gui_idle_interrupt(rdev, true);
4803
4804 return 0;
4805}
4806
4807static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4808{
4809 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4810 RREG32(GRBM_STATUS));
4811 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4812 RREG32(GRBM_STATUS2));
4813 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4814 RREG32(GRBM_STATUS_SE0));
4815 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4816 RREG32(GRBM_STATUS_SE1));
4817 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4818 RREG32(GRBM_STATUS_SE2));
4819 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4820 RREG32(GRBM_STATUS_SE3));
4821 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4822 RREG32(SRBM_STATUS));
4823 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4824 RREG32(SRBM_STATUS2));
4825 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4826 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4827 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4828 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4829 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4830 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4831 RREG32(CP_STALLED_STAT1));
4832 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4833 RREG32(CP_STALLED_STAT2));
4834 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4835 RREG32(CP_STALLED_STAT3));
4836 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4837 RREG32(CP_CPF_BUSY_STAT));
4838 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4839 RREG32(CP_CPF_STALLED_STAT1));
4840 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4841 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4842 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4843 RREG32(CP_CPC_STALLED_STAT1));
4844 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4845}
4846
4847/**
4848 * cik_gpu_check_soft_reset - check which blocks are busy
4849 *
4850 * @rdev: radeon_device pointer
4851 *
4852 * Check which blocks are busy and return the relevant reset
4853 * mask to be used by cik_gpu_soft_reset().
4854 * Returns a mask of the blocks to be reset.
4855 */
4856u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4857{
4858 u32 reset_mask = 0;
4859 u32 tmp;
4860
4861 /* GRBM_STATUS */
4862 tmp = RREG32(GRBM_STATUS);
4863 if (tmp & (PA_BUSY | SC_BUSY |
4864 BCI_BUSY | SX_BUSY |
4865 TA_BUSY | VGT_BUSY |
4866 DB_BUSY | CB_BUSY |
4867 GDS_BUSY | SPI_BUSY |
4868 IA_BUSY | IA_BUSY_NO_DMA))
4869 reset_mask |= RADEON_RESET_GFX;
4870
4871 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4872 reset_mask |= RADEON_RESET_CP;
4873
4874 /* GRBM_STATUS2 */
4875 tmp = RREG32(GRBM_STATUS2);
4876 if (tmp & RLC_BUSY)
4877 reset_mask |= RADEON_RESET_RLC;
4878
4879 /* SDMA0_STATUS_REG */
4880 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4881 if (!(tmp & SDMA_IDLE))
4882 reset_mask |= RADEON_RESET_DMA;
4883
4884 /* SDMA1_STATUS_REG */
4885 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4886 if (!(tmp & SDMA_IDLE))
4887 reset_mask |= RADEON_RESET_DMA1;
4888
4889 /* SRBM_STATUS2 */
4890 tmp = RREG32(SRBM_STATUS2);
4891 if (tmp & SDMA_BUSY)
4892 reset_mask |= RADEON_RESET_DMA;
4893
4894 if (tmp & SDMA1_BUSY)
4895 reset_mask |= RADEON_RESET_DMA1;
4896
4897 /* SRBM_STATUS */
4898 tmp = RREG32(SRBM_STATUS);
4899
4900 if (tmp & IH_BUSY)
4901 reset_mask |= RADEON_RESET_IH;
4902
4903 if (tmp & SEM_BUSY)
4904 reset_mask |= RADEON_RESET_SEM;
4905
4906 if (tmp & GRBM_RQ_PENDING)
4907 reset_mask |= RADEON_RESET_GRBM;
4908
4909 if (tmp & VMC_BUSY)
4910 reset_mask |= RADEON_RESET_VMC;
4911
4912 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4913 MCC_BUSY | MCD_BUSY))
4914 reset_mask |= RADEON_RESET_MC;
4915
4916 if (evergreen_is_display_hung(rdev))
4917 reset_mask |= RADEON_RESET_DISPLAY;
4918
4919 /* Skip MC reset as it's mostly likely not hung, just busy */
4920 if (reset_mask & RADEON_RESET_MC) {
4921 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4922 reset_mask &= ~RADEON_RESET_MC;
4923 }
4924
4925 return reset_mask;
4926}
4927
4928/**
4929 * cik_gpu_soft_reset - soft reset GPU
4930 *
4931 * @rdev: radeon_device pointer
4932 * @reset_mask: mask of which blocks to reset
4933 *
4934 * Soft reset the blocks specified in @reset_mask.
4935 */
4936static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4937{
4938 struct evergreen_mc_save save;
4939 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4940 u32 tmp;
4941
4942 if (reset_mask == 0)
4943 return;
4944
4945 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4946
4947 cik_print_gpu_status_regs(rdev);
4948 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4949 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4950 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4951 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4952
4953 /* disable CG/PG */
4954 cik_fini_pg(rdev);
4955 cik_fini_cg(rdev);
4956
4957 /* stop the rlc */
4958 cik_rlc_stop(rdev);
4959
4960 /* Disable GFX parsing/prefetching */
4961 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4962
4963 /* Disable MEC parsing/prefetching */
4964 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4965
4966 if (reset_mask & RADEON_RESET_DMA) {
4967 /* sdma0 */
4968 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4969 tmp |= SDMA_HALT;
4970 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4971 }
4972 if (reset_mask & RADEON_RESET_DMA1) {
4973 /* sdma1 */
4974 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4975 tmp |= SDMA_HALT;
4976 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4977 }
4978
4979 evergreen_mc_stop(rdev, &save);
4980 if (evergreen_mc_wait_for_idle(rdev)) {
4981 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4982 }
4983
4984 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4985 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4986
4987 if (reset_mask & RADEON_RESET_CP) {
4988 grbm_soft_reset |= SOFT_RESET_CP;
4989
4990 srbm_soft_reset |= SOFT_RESET_GRBM;
4991 }
4992
4993 if (reset_mask & RADEON_RESET_DMA)
4994 srbm_soft_reset |= SOFT_RESET_SDMA;
4995
4996 if (reset_mask & RADEON_RESET_DMA1)
4997 srbm_soft_reset |= SOFT_RESET_SDMA1;
4998
4999 if (reset_mask & RADEON_RESET_DISPLAY)
5000 srbm_soft_reset |= SOFT_RESET_DC;
5001
5002 if (reset_mask & RADEON_RESET_RLC)
5003 grbm_soft_reset |= SOFT_RESET_RLC;
5004
5005 if (reset_mask & RADEON_RESET_SEM)
5006 srbm_soft_reset |= SOFT_RESET_SEM;
5007
5008 if (reset_mask & RADEON_RESET_IH)
5009 srbm_soft_reset |= SOFT_RESET_IH;
5010
5011 if (reset_mask & RADEON_RESET_GRBM)
5012 srbm_soft_reset |= SOFT_RESET_GRBM;
5013
5014 if (reset_mask & RADEON_RESET_VMC)
5015 srbm_soft_reset |= SOFT_RESET_VMC;
5016
5017 if (!(rdev->flags & RADEON_IS_IGP)) {
5018 if (reset_mask & RADEON_RESET_MC)
5019 srbm_soft_reset |= SOFT_RESET_MC;
5020 }
5021
5022 if (grbm_soft_reset) {
5023 tmp = RREG32(GRBM_SOFT_RESET);
5024 tmp |= grbm_soft_reset;
5025 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5026 WREG32(GRBM_SOFT_RESET, tmp);
5027 tmp = RREG32(GRBM_SOFT_RESET);
5028
5029 udelay(50);
5030
5031 tmp &= ~grbm_soft_reset;
5032 WREG32(GRBM_SOFT_RESET, tmp);
5033 tmp = RREG32(GRBM_SOFT_RESET);
5034 }
5035
5036 if (srbm_soft_reset) {
5037 tmp = RREG32(SRBM_SOFT_RESET);
5038 tmp |= srbm_soft_reset;
5039 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5040 WREG32(SRBM_SOFT_RESET, tmp);
5041 tmp = RREG32(SRBM_SOFT_RESET);
5042
5043 udelay(50);
5044
5045 tmp &= ~srbm_soft_reset;
5046 WREG32(SRBM_SOFT_RESET, tmp);
5047 tmp = RREG32(SRBM_SOFT_RESET);
5048 }
5049
5050 /* Wait a little for things to settle down */
5051 udelay(50);
5052
5053 evergreen_mc_resume(rdev, &save);
5054 udelay(50);
5055
5056 cik_print_gpu_status_regs(rdev);
5057}
5058
5059struct kv_reset_save_regs {
5060 u32 gmcon_reng_execute;
5061 u32 gmcon_misc;
5062 u32 gmcon_misc3;
5063};
5064
5065static void kv_save_regs_for_reset(struct radeon_device *rdev,
5066 struct kv_reset_save_regs *save)
5067{
5068 save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5069 save->gmcon_misc = RREG32(GMCON_MISC);
5070 save->gmcon_misc3 = RREG32(GMCON_MISC3);
5071
5072 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5073 WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5074 STCTRL_STUTTER_EN));
5075}
5076
5077static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5078 struct kv_reset_save_regs *save)
5079{
5080 int i;
5081
5082 WREG32(GMCON_PGFSM_WRITE, 0);
5083 WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5084
5085 for (i = 0; i < 5; i++)
5086 WREG32(GMCON_PGFSM_WRITE, 0);
5087
5088 WREG32(GMCON_PGFSM_WRITE, 0);
5089 WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5090
5091 for (i = 0; i < 5; i++)
5092 WREG32(GMCON_PGFSM_WRITE, 0);
5093
5094 WREG32(GMCON_PGFSM_WRITE, 0x210000);
5095 WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5096
5097 for (i = 0; i < 5; i++)
5098 WREG32(GMCON_PGFSM_WRITE, 0);
5099
5100 WREG32(GMCON_PGFSM_WRITE, 0x21003);
5101 WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5102
5103 for (i = 0; i < 5; i++)
5104 WREG32(GMCON_PGFSM_WRITE, 0);
5105
5106 WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5107 WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5108
5109 for (i = 0; i < 5; i++)
5110 WREG32(GMCON_PGFSM_WRITE, 0);
5111
5112 WREG32(GMCON_PGFSM_WRITE, 0);
5113 WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5114
5115 for (i = 0; i < 5; i++)
5116 WREG32(GMCON_PGFSM_WRITE, 0);
5117
5118 WREG32(GMCON_PGFSM_WRITE, 0x420000);
5119 WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5120
5121 for (i = 0; i < 5; i++)
5122 WREG32(GMCON_PGFSM_WRITE, 0);
5123
5124 WREG32(GMCON_PGFSM_WRITE, 0x120202);
5125 WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5126
5127 for (i = 0; i < 5; i++)
5128 WREG32(GMCON_PGFSM_WRITE, 0);
5129
5130 WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5131 WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5132
5133 for (i = 0; i < 5; i++)
5134 WREG32(GMCON_PGFSM_WRITE, 0);
5135
5136 WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5137 WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5138
5139 for (i = 0; i < 5; i++)
5140 WREG32(GMCON_PGFSM_WRITE, 0);
5141
5142 WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5143 WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5144
5145 WREG32(GMCON_MISC3, save->gmcon_misc3);
5146 WREG32(GMCON_MISC, save->gmcon_misc);
5147 WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5148}
5149
5150static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5151{
5152 struct evergreen_mc_save save;
5153 struct kv_reset_save_regs kv_save = { 0 };
5154 u32 tmp, i;
5155
5156 dev_info(rdev->dev, "GPU pci config reset\n");
5157
5158 /* disable dpm? */
5159
5160 /* disable cg/pg */
5161 cik_fini_pg(rdev);
5162 cik_fini_cg(rdev);
5163
5164 /* Disable GFX parsing/prefetching */
5165 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5166
5167 /* Disable MEC parsing/prefetching */
5168 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5169
5170 /* sdma0 */
5171 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5172 tmp |= SDMA_HALT;
5173 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5174 /* sdma1 */
5175 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5176 tmp |= SDMA_HALT;
5177 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5178 /* XXX other engines? */
5179
5180 /* halt the rlc, disable cp internal ints */
5181 cik_rlc_stop(rdev);
5182
5183 udelay(50);
5184
5185 /* disable mem access */
5186 evergreen_mc_stop(rdev, &save);
5187 if (evergreen_mc_wait_for_idle(rdev)) {
5188 dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5189 }
5190
5191 if (rdev->flags & RADEON_IS_IGP)
5192 kv_save_regs_for_reset(rdev, &kv_save);
5193
5194 /* disable BM */
5195 pci_clear_master(rdev->pdev);
5196 /* reset */
5197 radeon_pci_config_reset(rdev);
5198
5199 udelay(100);
5200
5201 /* wait for asic to come out of reset */
5202 for (i = 0; i < rdev->usec_timeout; i++) {
5203 if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5204 break;
5205 udelay(1);
5206 }
5207
5208 /* does asic init need to be run first??? */
5209 if (rdev->flags & RADEON_IS_IGP)
5210 kv_restore_regs_for_reset(rdev, &kv_save);
5211}
5212
5213/**
5214 * cik_asic_reset - soft reset GPU
5215 *
5216 * @rdev: radeon_device pointer
5217 * @hard: force hard reset
5218 *
5219 * Look up which blocks are hung and attempt
5220 * to reset them.
5221 * Returns 0 for success.
5222 */
5223int cik_asic_reset(struct radeon_device *rdev, bool hard)
5224{
5225 u32 reset_mask;
5226
5227 if (hard) {
5228 cik_gpu_pci_config_reset(rdev);
5229 return 0;
5230 }
5231
5232 reset_mask = cik_gpu_check_soft_reset(rdev);
5233
5234 if (reset_mask)
5235 r600_set_bios_scratch_engine_hung(rdev, true);
5236
5237 /* try soft reset */
5238 cik_gpu_soft_reset(rdev, reset_mask);
5239
5240 reset_mask = cik_gpu_check_soft_reset(rdev);
5241
5242 /* try pci config reset */
5243 if (reset_mask && radeon_hard_reset)
5244 cik_gpu_pci_config_reset(rdev);
5245
5246 reset_mask = cik_gpu_check_soft_reset(rdev);
5247
5248 if (!reset_mask)
5249 r600_set_bios_scratch_engine_hung(rdev, false);
5250
5251 return 0;
5252}
5253
5254/**
5255 * cik_gfx_is_lockup - check if the 3D engine is locked up
5256 *
5257 * @rdev: radeon_device pointer
5258 * @ring: radeon_ring structure holding ring information
5259 *
5260 * Check if the 3D engine is locked up (CIK).
5261 * Returns true if the engine is locked, false if not.
5262 */
5263bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5264{
5265 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5266
5267 if (!(reset_mask & (RADEON_RESET_GFX |
5268 RADEON_RESET_COMPUTE |
5269 RADEON_RESET_CP))) {
5270 radeon_ring_lockup_update(rdev, ring);
5271 return false;
5272 }
5273 return radeon_ring_test_lockup(rdev, ring);
5274}
5275
5276/* MC */
5277/**
5278 * cik_mc_program - program the GPU memory controller
5279 *
5280 * @rdev: radeon_device pointer
5281 *
5282 * Set the location of vram, gart, and AGP in the GPU's
5283 * physical address space (CIK).
5284 */
5285static void cik_mc_program(struct radeon_device *rdev)
5286{
5287 struct evergreen_mc_save save;
5288 u32 tmp;
5289 int i, j;
5290
5291 /* Initialize HDP */
5292 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5293 WREG32((0x2c14 + j), 0x00000000);
5294 WREG32((0x2c18 + j), 0x00000000);
5295 WREG32((0x2c1c + j), 0x00000000);
5296 WREG32((0x2c20 + j), 0x00000000);
5297 WREG32((0x2c24 + j), 0x00000000);
5298 }
5299 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5300
5301 evergreen_mc_stop(rdev, &save);
5302 if (radeon_mc_wait_for_idle(rdev)) {
5303 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5304 }
5305 /* Lockout access through VGA aperture*/
5306 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5307 /* Update configuration */
5308 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5309 rdev->mc.vram_start >> 12);
5310 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5311 rdev->mc.vram_end >> 12);
5312 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5313 rdev->vram_scratch.gpu_addr >> 12);
5314 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5315 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5316 WREG32(MC_VM_FB_LOCATION, tmp);
5317 /* XXX double check these! */
5318 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5319 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5320 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5321 WREG32(MC_VM_AGP_BASE, 0);
5322 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5323 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5324 if (radeon_mc_wait_for_idle(rdev)) {
5325 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5326 }
5327 evergreen_mc_resume(rdev, &save);
5328 /* we need to own VRAM, so turn off the VGA renderer here
5329 * to stop it overwriting our objects */
5330 rv515_vga_render_disable(rdev);
5331}
5332
5333/**
5334 * cik_mc_init - initialize the memory controller driver params
5335 *
5336 * @rdev: radeon_device pointer
5337 *
5338 * Look up the amount of vram, vram width, and decide how to place
5339 * vram and gart within the GPU's physical address space (CIK).
5340 * Returns 0 for success.
5341 */
5342static int cik_mc_init(struct radeon_device *rdev)
5343{
5344 u32 tmp;
5345 int chansize, numchan;
5346
5347 /* Get VRAM informations */
5348 rdev->mc.vram_is_ddr = true;
5349 tmp = RREG32(MC_ARB_RAMCFG);
5350 if (tmp & CHANSIZE_MASK) {
5351 chansize = 64;
5352 } else {
5353 chansize = 32;
5354 }
5355 tmp = RREG32(MC_SHARED_CHMAP);
5356 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5357 case 0:
5358 default:
5359 numchan = 1;
5360 break;
5361 case 1:
5362 numchan = 2;
5363 break;
5364 case 2:
5365 numchan = 4;
5366 break;
5367 case 3:
5368 numchan = 8;
5369 break;
5370 case 4:
5371 numchan = 3;
5372 break;
5373 case 5:
5374 numchan = 6;
5375 break;
5376 case 6:
5377 numchan = 10;
5378 break;
5379 case 7:
5380 numchan = 12;
5381 break;
5382 case 8:
5383 numchan = 16;
5384 break;
5385 }
5386 rdev->mc.vram_width = numchan * chansize;
5387 /* Could aper size report 0 ? */
5388 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5389 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5390 /* size in MB on si */
5391 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5392 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5393 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5394 si_vram_gtt_location(rdev, &rdev->mc);
5395 radeon_update_bandwidth_info(rdev);
5396
5397 return 0;
5398}
5399
5400/*
5401 * GART
5402 * VMID 0 is the physical GPU addresses as used by the kernel.
5403 * VMIDs 1-15 are used for userspace clients and are handled
5404 * by the radeon vm/hsa code.
5405 */
5406/**
5407 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5408 *
5409 * @rdev: radeon_device pointer
5410 *
5411 * Flush the TLB for the VMID 0 page table (CIK).
5412 */
5413void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5414{
5415 /* flush hdp cache */
5416 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5417
5418 /* bits 0-15 are the VM contexts0-15 */
5419 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5420}
5421
5422/**
5423 * cik_pcie_gart_enable - gart enable
5424 *
5425 * @rdev: radeon_device pointer
5426 *
5427 * This sets up the TLBs, programs the page tables for VMID0,
5428 * sets up the hw for VMIDs 1-15 which are allocated on
5429 * demand, and sets up the global locations for the LDS, GDS,
5430 * and GPUVM for FSA64 clients (CIK).
5431 * Returns 0 for success, errors for failure.
5432 */
5433static int cik_pcie_gart_enable(struct radeon_device *rdev)
5434{
5435 int r, i;
5436
5437 if (rdev->gart.robj == NULL) {
5438 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5439 return -EINVAL;
5440 }
5441 r = radeon_gart_table_vram_pin(rdev);
5442 if (r)
5443 return r;
5444 /* Setup TLB control */
5445 WREG32(MC_VM_MX_L1_TLB_CNTL,
5446 (0xA << 7) |
5447 ENABLE_L1_TLB |
5448 ENABLE_L1_FRAGMENT_PROCESSING |
5449 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5450 ENABLE_ADVANCED_DRIVER_MODEL |
5451 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5452 /* Setup L2 cache */
5453 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5454 ENABLE_L2_FRAGMENT_PROCESSING |
5455 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5456 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5457 EFFECTIVE_L2_QUEUE_SIZE(7) |
5458 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5459 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5460 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5461 BANK_SELECT(4) |
5462 L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5463 /* setup context0 */
5464 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5465 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5466 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5467 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5468 (u32)(rdev->dummy_page.addr >> 12));
5469 WREG32(VM_CONTEXT0_CNTL2, 0);
5470 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5471 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5472
5473 WREG32(0x15D4, 0);
5474 WREG32(0x15D8, 0);
5475 WREG32(0x15DC, 0);
5476
5477 /* restore context1-15 */
5478 /* set vm size, must be a multiple of 4 */
5479 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5480 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5481 for (i = 1; i < 16; i++) {
5482 if (i < 8)
5483 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5484 rdev->vm_manager.saved_table_addr[i]);
5485 else
5486 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5487 rdev->vm_manager.saved_table_addr[i]);
5488 }
5489
5490 /* enable context1-15 */
5491 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5492 (u32)(rdev->dummy_page.addr >> 12));
5493 WREG32(VM_CONTEXT1_CNTL2, 4);
5494 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5495 PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5496 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5497 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5498 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5499 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5500 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5501 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5502 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5503 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5504 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5505 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5506 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5507 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5508
5509 if (rdev->family == CHIP_KAVERI) {
5510 u32 tmp = RREG32(CHUB_CONTROL);
5511 tmp &= ~BYPASS_VM;
5512 WREG32(CHUB_CONTROL, tmp);
5513 }
5514
5515 /* XXX SH_MEM regs */
5516 /* where to put LDS, scratch, GPUVM in FSA64 space */
5517 mutex_lock(&rdev->srbm_mutex);
5518 for (i = 0; i < 16; i++) {
5519 cik_srbm_select(rdev, 0, 0, 0, i);
5520 /* CP and shaders */
5521 WREG32(SH_MEM_CONFIG, SH_MEM_CONFIG_GFX_DEFAULT);
5522 WREG32(SH_MEM_APE1_BASE, 1);
5523 WREG32(SH_MEM_APE1_LIMIT, 0);
5524 WREG32(SH_MEM_BASES, 0);
5525 /* SDMA GFX */
5526 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5527 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5528 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5529 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5530 /* XXX SDMA RLC - todo */
5531 }
5532 cik_srbm_select(rdev, 0, 0, 0, 0);
5533 mutex_unlock(&rdev->srbm_mutex);
5534
5535 cik_pcie_gart_tlb_flush(rdev);
5536 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5537 (unsigned)(rdev->mc.gtt_size >> 20),
5538 (unsigned long long)rdev->gart.table_addr);
5539 rdev->gart.ready = true;
5540 return 0;
5541}
5542
5543/**
5544 * cik_pcie_gart_disable - gart disable
5545 *
5546 * @rdev: radeon_device pointer
5547 *
5548 * This disables all VM page table (CIK).
5549 */
5550static void cik_pcie_gart_disable(struct radeon_device *rdev)
5551{
5552 unsigned i;
5553
5554 for (i = 1; i < 16; ++i) {
5555 uint32_t reg;
5556 if (i < 8)
5557 reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5558 else
5559 reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5560 rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5561 }
5562
5563 /* Disable all tables */
5564 WREG32(VM_CONTEXT0_CNTL, 0);
5565 WREG32(VM_CONTEXT1_CNTL, 0);
5566 /* Setup TLB control */
5567 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5568 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5569 /* Setup L2 cache */
5570 WREG32(VM_L2_CNTL,
5571 ENABLE_L2_FRAGMENT_PROCESSING |
5572 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5573 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5574 EFFECTIVE_L2_QUEUE_SIZE(7) |
5575 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5576 WREG32(VM_L2_CNTL2, 0);
5577 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5578 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5579 radeon_gart_table_vram_unpin(rdev);
5580}
5581
5582/**
5583 * cik_pcie_gart_fini - vm fini callback
5584 *
5585 * @rdev: radeon_device pointer
5586 *
5587 * Tears down the driver GART/VM setup (CIK).
5588 */
5589static void cik_pcie_gart_fini(struct radeon_device *rdev)
5590{
5591 cik_pcie_gart_disable(rdev);
5592 radeon_gart_table_vram_free(rdev);
5593 radeon_gart_fini(rdev);
5594}
5595
5596/* vm parser */
5597/**
5598 * cik_ib_parse - vm ib_parse callback
5599 *
5600 * @rdev: radeon_device pointer
5601 * @ib: indirect buffer pointer
5602 *
5603 * CIK uses hw IB checking so this is a nop (CIK).
5604 */
5605int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5606{
5607 return 0;
5608}
5609
5610/*
5611 * vm
5612 * VMID 0 is the physical GPU addresses as used by the kernel.
5613 * VMIDs 1-15 are used for userspace clients and are handled
5614 * by the radeon vm/hsa code.
5615 */
5616/**
5617 * cik_vm_init - cik vm init callback
5618 *
5619 * @rdev: radeon_device pointer
5620 *
5621 * Inits cik specific vm parameters (number of VMs, base of vram for
5622 * VMIDs 1-15) (CIK).
5623 * Returns 0 for success.
5624 */
5625int cik_vm_init(struct radeon_device *rdev)
5626{
5627 /*
5628 * number of VMs
5629 * VMID 0 is reserved for System
5630 * radeon graphics/compute will use VMIDs 1-15
5631 */
5632 rdev->vm_manager.nvm = 16;
5633 /* base offset of vram pages */
5634 if (rdev->flags & RADEON_IS_IGP) {
5635 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5636 tmp <<= 22;
5637 rdev->vm_manager.vram_base_offset = tmp;
5638 } else
5639 rdev->vm_manager.vram_base_offset = 0;
5640
5641 return 0;
5642}
5643
5644/**
5645 * cik_vm_fini - cik vm fini callback
5646 *
5647 * @rdev: radeon_device pointer
5648 *
5649 * Tear down any asic specific VM setup (CIK).
5650 */
5651void cik_vm_fini(struct radeon_device *rdev)
5652{
5653}
5654
5655/**
5656 * cik_vm_decode_fault - print human readable fault info
5657 *
5658 * @rdev: radeon_device pointer
5659 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5660 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5661 *
5662 * Print human readable fault information (CIK).
5663 */
5664static void cik_vm_decode_fault(struct radeon_device *rdev,
5665 u32 status, u32 addr, u32 mc_client)
5666{
5667 u32 mc_id;
5668 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5669 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5670 char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5671 (mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5672
5673 if (rdev->family == CHIP_HAWAII)
5674 mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5675 else
5676 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5677
5678 printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5679 protections, vmid, addr,
5680 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5681 block, mc_client, mc_id);
5682}
5683
5684/**
5685 * cik_vm_flush - cik vm flush using the CP
5686 *
5687 * @rdev: radeon_device pointer
5688 *
5689 * Update the page table base and flush the VM TLB
5690 * using the CP (CIK).
5691 */
5692void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5693 unsigned vm_id, uint64_t pd_addr)
5694{
5695 int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
5696
5697 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5698 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5699 WRITE_DATA_DST_SEL(0)));
5700 if (vm_id < 8) {
5701 radeon_ring_write(ring,
5702 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5703 } else {
5704 radeon_ring_write(ring,
5705 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5706 }
5707 radeon_ring_write(ring, 0);
5708 radeon_ring_write(ring, pd_addr >> 12);
5709
5710 /* update SH_MEM_* regs */
5711 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5712 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5713 WRITE_DATA_DST_SEL(0)));
5714 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5715 radeon_ring_write(ring, 0);
5716 radeon_ring_write(ring, VMID(vm_id));
5717
5718 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5719 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5720 WRITE_DATA_DST_SEL(0)));
5721 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5722 radeon_ring_write(ring, 0);
5723
5724 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5725 radeon_ring_write(ring, SH_MEM_CONFIG_GFX_DEFAULT); /* SH_MEM_CONFIG */
5726 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5727 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5728
5729 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5730 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5731 WRITE_DATA_DST_SEL(0)));
5732 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5733 radeon_ring_write(ring, 0);
5734 radeon_ring_write(ring, VMID(0));
5735
5736 /* HDP flush */
5737 cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
5738
5739 /* bits 0-15 are the VM contexts0-15 */
5740 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5741 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
5742 WRITE_DATA_DST_SEL(0)));
5743 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5744 radeon_ring_write(ring, 0);
5745 radeon_ring_write(ring, 1 << vm_id);
5746
5747 /* wait for the invalidate to complete */
5748 radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5749 radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
5750 WAIT_REG_MEM_FUNCTION(0) | /* always */
5751 WAIT_REG_MEM_ENGINE(0))); /* me */
5752 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5753 radeon_ring_write(ring, 0);
5754 radeon_ring_write(ring, 0); /* ref */
5755 radeon_ring_write(ring, 0); /* mask */
5756 radeon_ring_write(ring, 0x20); /* poll interval */
5757
5758 /* compute doesn't have PFP */
5759 if (usepfp) {
5760 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5761 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5762 radeon_ring_write(ring, 0x0);
5763 }
5764}
5765
5766/*
5767 * RLC
5768 * The RLC is a multi-purpose microengine that handles a
5769 * variety of functions, the most important of which is
5770 * the interrupt controller.
5771 */
5772static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5773 bool enable)
5774{
5775 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5776
5777 if (enable)
5778 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5779 else
5780 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5781 WREG32(CP_INT_CNTL_RING0, tmp);
5782}
5783
5784static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5785{
5786 u32 tmp;
5787
5788 tmp = RREG32(RLC_LB_CNTL);
5789 if (enable)
5790 tmp |= LOAD_BALANCE_ENABLE;
5791 else
5792 tmp &= ~LOAD_BALANCE_ENABLE;
5793 WREG32(RLC_LB_CNTL, tmp);
5794}
5795
5796static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5797{
5798 u32 i, j, k;
5799 u32 mask;
5800
5801 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5802 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5803 cik_select_se_sh(rdev, i, j);
5804 for (k = 0; k < rdev->usec_timeout; k++) {
5805 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5806 break;
5807 udelay(1);
5808 }
5809 }
5810 }
5811 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5812
5813 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5814 for (k = 0; k < rdev->usec_timeout; k++) {
5815 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5816 break;
5817 udelay(1);
5818 }
5819}
5820
5821static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5822{
5823 u32 tmp;
5824
5825 tmp = RREG32(RLC_CNTL);
5826 if (tmp != rlc)
5827 WREG32(RLC_CNTL, rlc);
5828}
5829
5830static u32 cik_halt_rlc(struct radeon_device *rdev)
5831{
5832 u32 data, orig;
5833
5834 orig = data = RREG32(RLC_CNTL);
5835
5836 if (data & RLC_ENABLE) {
5837 u32 i;
5838
5839 data &= ~RLC_ENABLE;
5840 WREG32(RLC_CNTL, data);
5841
5842 for (i = 0; i < rdev->usec_timeout; i++) {
5843 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5844 break;
5845 udelay(1);
5846 }
5847
5848 cik_wait_for_rlc_serdes(rdev);
5849 }
5850
5851 return orig;
5852}
5853
5854void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5855{
5856 u32 tmp, i, mask;
5857
5858 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5859 WREG32(RLC_GPR_REG2, tmp);
5860
5861 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5862 for (i = 0; i < rdev->usec_timeout; i++) {
5863 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5864 break;
5865 udelay(1);
5866 }
5867
5868 for (i = 0; i < rdev->usec_timeout; i++) {
5869 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5870 break;
5871 udelay(1);
5872 }
5873}
5874
5875void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5876{
5877 u32 tmp;
5878
5879 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5880 WREG32(RLC_GPR_REG2, tmp);
5881}
5882
5883/**
5884 * cik_rlc_stop - stop the RLC ME
5885 *
5886 * @rdev: radeon_device pointer
5887 *
5888 * Halt the RLC ME (MicroEngine) (CIK).
5889 */
5890static void cik_rlc_stop(struct radeon_device *rdev)
5891{
5892 WREG32(RLC_CNTL, 0);
5893
5894 cik_enable_gui_idle_interrupt(rdev, false);
5895
5896 cik_wait_for_rlc_serdes(rdev);
5897}
5898
5899/**
5900 * cik_rlc_start - start the RLC ME
5901 *
5902 * @rdev: radeon_device pointer
5903 *
5904 * Unhalt the RLC ME (MicroEngine) (CIK).
5905 */
5906static void cik_rlc_start(struct radeon_device *rdev)
5907{
5908 WREG32(RLC_CNTL, RLC_ENABLE);
5909
5910 cik_enable_gui_idle_interrupt(rdev, true);
5911
5912 udelay(50);
5913}
5914
5915/**
5916 * cik_rlc_resume - setup the RLC hw
5917 *
5918 * @rdev: radeon_device pointer
5919 *
5920 * Initialize the RLC registers, load the ucode,
5921 * and start the RLC (CIK).
5922 * Returns 0 for success, -EINVAL if the ucode is not available.
5923 */
5924static int cik_rlc_resume(struct radeon_device *rdev)
5925{
5926 u32 i, size, tmp;
5927
5928 if (!rdev->rlc_fw)
5929 return -EINVAL;
5930
5931 cik_rlc_stop(rdev);
5932
5933 /* disable CG */
5934 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5935 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5936
5937 si_rlc_reset(rdev);
5938
5939 cik_init_pg(rdev);
5940
5941 cik_init_cg(rdev);
5942
5943 WREG32(RLC_LB_CNTR_INIT, 0);
5944 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5945
5946 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5947 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5948 WREG32(RLC_LB_PARAMS, 0x00600408);
5949 WREG32(RLC_LB_CNTL, 0x80000004);
5950
5951 WREG32(RLC_MC_CNTL, 0);
5952 WREG32(RLC_UCODE_CNTL, 0);
5953
5954 if (rdev->new_fw) {
5955 const struct rlc_firmware_header_v1_0 *hdr =
5956 (const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5957 const __le32 *fw_data = (const __le32 *)
5958 (rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5959
5960 radeon_ucode_print_rlc_hdr(&hdr->header);
5961
5962 size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5963 WREG32(RLC_GPM_UCODE_ADDR, 0);
5964 for (i = 0; i < size; i++)
5965 WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
5966 WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
5967 } else {
5968 const __be32 *fw_data;
5969
5970 switch (rdev->family) {
5971 case CHIP_BONAIRE:
5972 case CHIP_HAWAII:
5973 default:
5974 size = BONAIRE_RLC_UCODE_SIZE;
5975 break;
5976 case CHIP_KAVERI:
5977 size = KV_RLC_UCODE_SIZE;
5978 break;
5979 case CHIP_KABINI:
5980 size = KB_RLC_UCODE_SIZE;
5981 break;
5982 case CHIP_MULLINS:
5983 size = ML_RLC_UCODE_SIZE;
5984 break;
5985 }
5986
5987 fw_data = (const __be32 *)rdev->rlc_fw->data;
5988 WREG32(RLC_GPM_UCODE_ADDR, 0);
5989 for (i = 0; i < size; i++)
5990 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5991 WREG32(RLC_GPM_UCODE_ADDR, 0);
5992 }
5993
5994 /* XXX - find out what chips support lbpw */
5995 cik_enable_lbpw(rdev, false);
5996
5997 if (rdev->family == CHIP_BONAIRE)
5998 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5999
6000 cik_rlc_start(rdev);
6001
6002 return 0;
6003}
6004
6005static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6006{
6007 u32 data, orig, tmp, tmp2;
6008
6009 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6010
6011 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6012 cik_enable_gui_idle_interrupt(rdev, true);
6013
6014 tmp = cik_halt_rlc(rdev);
6015
6016 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6017 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6018 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6019 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6020 WREG32(RLC_SERDES_WR_CTRL, tmp2);
6021
6022 cik_update_rlc(rdev, tmp);
6023
6024 data |= CGCG_EN | CGLS_EN;
6025 } else {
6026 cik_enable_gui_idle_interrupt(rdev, false);
6027
6028 RREG32(CB_CGTT_SCLK_CTRL);
6029 RREG32(CB_CGTT_SCLK_CTRL);
6030 RREG32(CB_CGTT_SCLK_CTRL);
6031 RREG32(CB_CGTT_SCLK_CTRL);
6032
6033 data &= ~(CGCG_EN | CGLS_EN);
6034 }
6035
6036 if (orig != data)
6037 WREG32(RLC_CGCG_CGLS_CTRL, data);
6038
6039}
6040
6041static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6042{
6043 u32 data, orig, tmp = 0;
6044
6045 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6046 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6047 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6048 orig = data = RREG32(CP_MEM_SLP_CNTL);
6049 data |= CP_MEM_LS_EN;
6050 if (orig != data)
6051 WREG32(CP_MEM_SLP_CNTL, data);
6052 }
6053 }
6054
6055 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6056 data |= 0x00000001;
6057 data &= 0xfffffffd;
6058 if (orig != data)
6059 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6060
6061 tmp = cik_halt_rlc(rdev);
6062
6063 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6064 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6065 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6066 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6067 WREG32(RLC_SERDES_WR_CTRL, data);
6068
6069 cik_update_rlc(rdev, tmp);
6070
6071 if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6072 orig = data = RREG32(CGTS_SM_CTRL_REG);
6073 data &= ~SM_MODE_MASK;
6074 data |= SM_MODE(0x2);
6075 data |= SM_MODE_ENABLE;
6076 data &= ~CGTS_OVERRIDE;
6077 if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6078 (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6079 data &= ~CGTS_LS_OVERRIDE;
6080 data &= ~ON_MONITOR_ADD_MASK;
6081 data |= ON_MONITOR_ADD_EN;
6082 data |= ON_MONITOR_ADD(0x96);
6083 if (orig != data)
6084 WREG32(CGTS_SM_CTRL_REG, data);
6085 }
6086 } else {
6087 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6088 data |= 0x00000003;
6089 if (orig != data)
6090 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6091
6092 data = RREG32(RLC_MEM_SLP_CNTL);
6093 if (data & RLC_MEM_LS_EN) {
6094 data &= ~RLC_MEM_LS_EN;
6095 WREG32(RLC_MEM_SLP_CNTL, data);
6096 }
6097
6098 data = RREG32(CP_MEM_SLP_CNTL);
6099 if (data & CP_MEM_LS_EN) {
6100 data &= ~CP_MEM_LS_EN;
6101 WREG32(CP_MEM_SLP_CNTL, data);
6102 }
6103
6104 orig = data = RREG32(CGTS_SM_CTRL_REG);
6105 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6106 if (orig != data)
6107 WREG32(CGTS_SM_CTRL_REG, data);
6108
6109 tmp = cik_halt_rlc(rdev);
6110
6111 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6112 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6113 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6114 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6115 WREG32(RLC_SERDES_WR_CTRL, data);
6116
6117 cik_update_rlc(rdev, tmp);
6118 }
6119}
6120
6121static const u32 mc_cg_registers[] =
6122{
6123 MC_HUB_MISC_HUB_CG,
6124 MC_HUB_MISC_SIP_CG,
6125 MC_HUB_MISC_VM_CG,
6126 MC_XPB_CLK_GAT,
6127 ATC_MISC_CG,
6128 MC_CITF_MISC_WR_CG,
6129 MC_CITF_MISC_RD_CG,
6130 MC_CITF_MISC_VM_CG,
6131 VM_L2_CG,
6132};
6133
6134static void cik_enable_mc_ls(struct radeon_device *rdev,
6135 bool enable)
6136{
6137 int i;
6138 u32 orig, data;
6139
6140 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6141 orig = data = RREG32(mc_cg_registers[i]);
6142 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6143 data |= MC_LS_ENABLE;
6144 else
6145 data &= ~MC_LS_ENABLE;
6146 if (data != orig)
6147 WREG32(mc_cg_registers[i], data);
6148 }
6149}
6150
6151static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6152 bool enable)
6153{
6154 int i;
6155 u32 orig, data;
6156
6157 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6158 orig = data = RREG32(mc_cg_registers[i]);
6159 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6160 data |= MC_CG_ENABLE;
6161 else
6162 data &= ~MC_CG_ENABLE;
6163 if (data != orig)
6164 WREG32(mc_cg_registers[i], data);
6165 }
6166}
6167
6168static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6169 bool enable)
6170{
6171 u32 orig, data;
6172
6173 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6174 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6175 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6176 } else {
6177 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6178 data |= 0xff000000;
6179 if (data != orig)
6180 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6181
6182 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6183 data |= 0xff000000;
6184 if (data != orig)
6185 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6186 }
6187}
6188
6189static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6190 bool enable)
6191{
6192 u32 orig, data;
6193
6194 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6195 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6196 data |= 0x100;
6197 if (orig != data)
6198 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6199
6200 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6201 data |= 0x100;
6202 if (orig != data)
6203 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6204 } else {
6205 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6206 data &= ~0x100;
6207 if (orig != data)
6208 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6209
6210 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6211 data &= ~0x100;
6212 if (orig != data)
6213 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6214 }
6215}
6216
6217static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6218 bool enable)
6219{
6220 u32 orig, data;
6221
6222 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6223 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6224 data = 0xfff;
6225 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6226
6227 orig = data = RREG32(UVD_CGC_CTRL);
6228 data |= DCM;
6229 if (orig != data)
6230 WREG32(UVD_CGC_CTRL, data);
6231 } else {
6232 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6233 data &= ~0xfff;
6234 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6235
6236 orig = data = RREG32(UVD_CGC_CTRL);
6237 data &= ~DCM;
6238 if (orig != data)
6239 WREG32(UVD_CGC_CTRL, data);
6240 }
6241}
6242
6243static void cik_enable_bif_mgls(struct radeon_device *rdev,
6244 bool enable)
6245{
6246 u32 orig, data;
6247
6248 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6249
6250 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6251 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6252 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6253 else
6254 data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6255 REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6256
6257 if (orig != data)
6258 WREG32_PCIE_PORT(PCIE_CNTL2, data);
6259}
6260
6261static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6262 bool enable)
6263{
6264 u32 orig, data;
6265
6266 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6267
6268 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6269 data &= ~CLOCK_GATING_DIS;
6270 else
6271 data |= CLOCK_GATING_DIS;
6272
6273 if (orig != data)
6274 WREG32(HDP_HOST_PATH_CNTL, data);
6275}
6276
6277static void cik_enable_hdp_ls(struct radeon_device *rdev,
6278 bool enable)
6279{
6280 u32 orig, data;
6281
6282 orig = data = RREG32(HDP_MEM_POWER_LS);
6283
6284 if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6285 data |= HDP_LS_ENABLE;
6286 else
6287 data &= ~HDP_LS_ENABLE;
6288
6289 if (orig != data)
6290 WREG32(HDP_MEM_POWER_LS, data);
6291}
6292
6293void cik_update_cg(struct radeon_device *rdev,
6294 u32 block, bool enable)
6295{
6296
6297 if (block & RADEON_CG_BLOCK_GFX) {
6298 cik_enable_gui_idle_interrupt(rdev, false);
6299 /* order matters! */
6300 if (enable) {
6301 cik_enable_mgcg(rdev, true);
6302 cik_enable_cgcg(rdev, true);
6303 } else {
6304 cik_enable_cgcg(rdev, false);
6305 cik_enable_mgcg(rdev, false);
6306 }
6307 cik_enable_gui_idle_interrupt(rdev, true);
6308 }
6309
6310 if (block & RADEON_CG_BLOCK_MC) {
6311 if (!(rdev->flags & RADEON_IS_IGP)) {
6312 cik_enable_mc_mgcg(rdev, enable);
6313 cik_enable_mc_ls(rdev, enable);
6314 }
6315 }
6316
6317 if (block & RADEON_CG_BLOCK_SDMA) {
6318 cik_enable_sdma_mgcg(rdev, enable);
6319 cik_enable_sdma_mgls(rdev, enable);
6320 }
6321
6322 if (block & RADEON_CG_BLOCK_BIF) {
6323 cik_enable_bif_mgls(rdev, enable);
6324 }
6325
6326 if (block & RADEON_CG_BLOCK_UVD) {
6327 if (rdev->has_uvd)
6328 cik_enable_uvd_mgcg(rdev, enable);
6329 }
6330
6331 if (block & RADEON_CG_BLOCK_HDP) {
6332 cik_enable_hdp_mgcg(rdev, enable);
6333 cik_enable_hdp_ls(rdev, enable);
6334 }
6335
6336 if (block & RADEON_CG_BLOCK_VCE) {
6337 vce_v2_0_enable_mgcg(rdev, enable);
6338 }
6339}
6340
6341static void cik_init_cg(struct radeon_device *rdev)
6342{
6343
6344 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6345
6346 if (rdev->has_uvd)
6347 si_init_uvd_internal_cg(rdev);
6348
6349 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6350 RADEON_CG_BLOCK_SDMA |
6351 RADEON_CG_BLOCK_BIF |
6352 RADEON_CG_BLOCK_UVD |
6353 RADEON_CG_BLOCK_HDP), true);
6354}
6355
6356static void cik_fini_cg(struct radeon_device *rdev)
6357{
6358 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6359 RADEON_CG_BLOCK_SDMA |
6360 RADEON_CG_BLOCK_BIF |
6361 RADEON_CG_BLOCK_UVD |
6362 RADEON_CG_BLOCK_HDP), false);
6363
6364 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6365}
6366
6367static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6368 bool enable)
6369{
6370 u32 data, orig;
6371
6372 orig = data = RREG32(RLC_PG_CNTL);
6373 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6374 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6375 else
6376 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6377 if (orig != data)
6378 WREG32(RLC_PG_CNTL, data);
6379}
6380
6381static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6382 bool enable)
6383{
6384 u32 data, orig;
6385
6386 orig = data = RREG32(RLC_PG_CNTL);
6387 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6388 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6389 else
6390 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6391 if (orig != data)
6392 WREG32(RLC_PG_CNTL, data);
6393}
6394
6395static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6396{
6397 u32 data, orig;
6398
6399 orig = data = RREG32(RLC_PG_CNTL);
6400 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6401 data &= ~DISABLE_CP_PG;
6402 else
6403 data |= DISABLE_CP_PG;
6404 if (orig != data)
6405 WREG32(RLC_PG_CNTL, data);
6406}
6407
6408static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6409{
6410 u32 data, orig;
6411
6412 orig = data = RREG32(RLC_PG_CNTL);
6413 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6414 data &= ~DISABLE_GDS_PG;
6415 else
6416 data |= DISABLE_GDS_PG;
6417 if (orig != data)
6418 WREG32(RLC_PG_CNTL, data);
6419}
6420
6421#define CP_ME_TABLE_SIZE 96
6422#define CP_ME_TABLE_OFFSET 2048
6423#define CP_MEC_TABLE_OFFSET 4096
6424
6425void cik_init_cp_pg_table(struct radeon_device *rdev)
6426{
6427 volatile u32 *dst_ptr;
6428 int me, i, max_me = 4;
6429 u32 bo_offset = 0;
6430 u32 table_offset, table_size;
6431
6432 if (rdev->family == CHIP_KAVERI)
6433 max_me = 5;
6434
6435 if (rdev->rlc.cp_table_ptr == NULL)
6436 return;
6437
6438 /* write the cp table buffer */
6439 dst_ptr = rdev->rlc.cp_table_ptr;
6440 for (me = 0; me < max_me; me++) {
6441 if (rdev->new_fw) {
6442 const __le32 *fw_data;
6443 const struct gfx_firmware_header_v1_0 *hdr;
6444
6445 if (me == 0) {
6446 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6447 fw_data = (const __le32 *)
6448 (rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6449 table_offset = le32_to_cpu(hdr->jt_offset);
6450 table_size = le32_to_cpu(hdr->jt_size);
6451 } else if (me == 1) {
6452 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6453 fw_data = (const __le32 *)
6454 (rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6455 table_offset = le32_to_cpu(hdr->jt_offset);
6456 table_size = le32_to_cpu(hdr->jt_size);
6457 } else if (me == 2) {
6458 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6459 fw_data = (const __le32 *)
6460 (rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6461 table_offset = le32_to_cpu(hdr->jt_offset);
6462 table_size = le32_to_cpu(hdr->jt_size);
6463 } else if (me == 3) {
6464 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6465 fw_data = (const __le32 *)
6466 (rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6467 table_offset = le32_to_cpu(hdr->jt_offset);
6468 table_size = le32_to_cpu(hdr->jt_size);
6469 } else {
6470 hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6471 fw_data = (const __le32 *)
6472 (rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6473 table_offset = le32_to_cpu(hdr->jt_offset);
6474 table_size = le32_to_cpu(hdr->jt_size);
6475 }
6476
6477 for (i = 0; i < table_size; i ++) {
6478 dst_ptr[bo_offset + i] =
6479 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6480 }
6481 bo_offset += table_size;
6482 } else {
6483 const __be32 *fw_data;
6484 table_size = CP_ME_TABLE_SIZE;
6485
6486 if (me == 0) {
6487 fw_data = (const __be32 *)rdev->ce_fw->data;
6488 table_offset = CP_ME_TABLE_OFFSET;
6489 } else if (me == 1) {
6490 fw_data = (const __be32 *)rdev->pfp_fw->data;
6491 table_offset = CP_ME_TABLE_OFFSET;
6492 } else if (me == 2) {
6493 fw_data = (const __be32 *)rdev->me_fw->data;
6494 table_offset = CP_ME_TABLE_OFFSET;
6495 } else {
6496 fw_data = (const __be32 *)rdev->mec_fw->data;
6497 table_offset = CP_MEC_TABLE_OFFSET;
6498 }
6499
6500 for (i = 0; i < table_size; i ++) {
6501 dst_ptr[bo_offset + i] =
6502 cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6503 }
6504 bo_offset += table_size;
6505 }
6506 }
6507}
6508
6509static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6510 bool enable)
6511{
6512 u32 data, orig;
6513
6514 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6515 orig = data = RREG32(RLC_PG_CNTL);
6516 data |= GFX_PG_ENABLE;
6517 if (orig != data)
6518 WREG32(RLC_PG_CNTL, data);
6519
6520 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6521 data |= AUTO_PG_EN;
6522 if (orig != data)
6523 WREG32(RLC_AUTO_PG_CTRL, data);
6524 } else {
6525 orig = data = RREG32(RLC_PG_CNTL);
6526 data &= ~GFX_PG_ENABLE;
6527 if (orig != data)
6528 WREG32(RLC_PG_CNTL, data);
6529
6530 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6531 data &= ~AUTO_PG_EN;
6532 if (orig != data)
6533 WREG32(RLC_AUTO_PG_CTRL, data);
6534
6535 data = RREG32(DB_RENDER_CONTROL);
6536 }
6537}
6538
6539static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6540{
6541 u32 mask = 0, tmp, tmp1;
6542 int i;
6543
6544 cik_select_se_sh(rdev, se, sh);
6545 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6546 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6547 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6548
6549 tmp &= 0xffff0000;
6550
6551 tmp |= tmp1;
6552 tmp >>= 16;
6553
6554 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6555 mask <<= 1;
6556 mask |= 1;
6557 }
6558
6559 return (~tmp) & mask;
6560}
6561
6562static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6563{
6564 u32 i, j, k, active_cu_number = 0;
6565 u32 mask, counter, cu_bitmap;
6566 u32 tmp = 0;
6567
6568 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6569 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6570 mask = 1;
6571 cu_bitmap = 0;
6572 counter = 0;
6573 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6574 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6575 if (counter < 2)
6576 cu_bitmap |= mask;
6577 counter ++;
6578 }
6579 mask <<= 1;
6580 }
6581
6582 active_cu_number += counter;
6583 tmp |= (cu_bitmap << (i * 16 + j * 8));
6584 }
6585 }
6586
6587 WREG32(RLC_PG_AO_CU_MASK, tmp);
6588
6589 tmp = RREG32(RLC_MAX_PG_CU);
6590 tmp &= ~MAX_PU_CU_MASK;
6591 tmp |= MAX_PU_CU(active_cu_number);
6592 WREG32(RLC_MAX_PG_CU, tmp);
6593}
6594
6595static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6596 bool enable)
6597{
6598 u32 data, orig;
6599
6600 orig = data = RREG32(RLC_PG_CNTL);
6601 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6602 data |= STATIC_PER_CU_PG_ENABLE;
6603 else
6604 data &= ~STATIC_PER_CU_PG_ENABLE;
6605 if (orig != data)
6606 WREG32(RLC_PG_CNTL, data);
6607}
6608
6609static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6610 bool enable)
6611{
6612 u32 data, orig;
6613
6614 orig = data = RREG32(RLC_PG_CNTL);
6615 if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6616 data |= DYN_PER_CU_PG_ENABLE;
6617 else
6618 data &= ~DYN_PER_CU_PG_ENABLE;
6619 if (orig != data)
6620 WREG32(RLC_PG_CNTL, data);
6621}
6622
6623#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6624#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6625
6626static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6627{
6628 u32 data, orig;
6629 u32 i;
6630
6631 if (rdev->rlc.cs_data) {
6632 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6633 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6634 WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6635 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6636 } else {
6637 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6638 for (i = 0; i < 3; i++)
6639 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6640 }
6641 if (rdev->rlc.reg_list) {
6642 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6643 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6644 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6645 }
6646
6647 orig = data = RREG32(RLC_PG_CNTL);
6648 data |= GFX_PG_SRC;
6649 if (orig != data)
6650 WREG32(RLC_PG_CNTL, data);
6651
6652 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6653 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6654
6655 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6656 data &= ~IDLE_POLL_COUNT_MASK;
6657 data |= IDLE_POLL_COUNT(0x60);
6658 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6659
6660 data = 0x10101010;
6661 WREG32(RLC_PG_DELAY, data);
6662
6663 data = RREG32(RLC_PG_DELAY_2);
6664 data &= ~0xff;
6665 data |= 0x3;
6666 WREG32(RLC_PG_DELAY_2, data);
6667
6668 data = RREG32(RLC_AUTO_PG_CTRL);
6669 data &= ~GRBM_REG_SGIT_MASK;
6670 data |= GRBM_REG_SGIT(0x700);
6671 WREG32(RLC_AUTO_PG_CTRL, data);
6672
6673}
6674
6675static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6676{
6677 cik_enable_gfx_cgpg(rdev, enable);
6678 cik_enable_gfx_static_mgpg(rdev, enable);
6679 cik_enable_gfx_dynamic_mgpg(rdev, enable);
6680}
6681
6682u32 cik_get_csb_size(struct radeon_device *rdev)
6683{
6684 u32 count = 0;
6685 const struct cs_section_def *sect = NULL;
6686 const struct cs_extent_def *ext = NULL;
6687
6688 if (rdev->rlc.cs_data == NULL)
6689 return 0;
6690
6691 /* begin clear state */
6692 count += 2;
6693 /* context control state */
6694 count += 3;
6695
6696 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6697 for (ext = sect->section; ext->extent != NULL; ++ext) {
6698 if (sect->id == SECT_CONTEXT)
6699 count += 2 + ext->reg_count;
6700 else
6701 return 0;
6702 }
6703 }
6704 /* pa_sc_raster_config/pa_sc_raster_config1 */
6705 count += 4;
6706 /* end clear state */
6707 count += 2;
6708 /* clear state */
6709 count += 2;
6710
6711 return count;
6712}
6713
6714void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6715{
6716 u32 count = 0, i;
6717 const struct cs_section_def *sect = NULL;
6718 const struct cs_extent_def *ext = NULL;
6719
6720 if (rdev->rlc.cs_data == NULL)
6721 return;
6722 if (buffer == NULL)
6723 return;
6724
6725 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6726 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6727
6728 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6729 buffer[count++] = cpu_to_le32(0x80000000);
6730 buffer[count++] = cpu_to_le32(0x80000000);
6731
6732 for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6733 for (ext = sect->section; ext->extent != NULL; ++ext) {
6734 if (sect->id == SECT_CONTEXT) {
6735 buffer[count++] =
6736 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6737 buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6738 for (i = 0; i < ext->reg_count; i++)
6739 buffer[count++] = cpu_to_le32(ext->extent[i]);
6740 } else {
6741 return;
6742 }
6743 }
6744 }
6745
6746 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6747 buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6748 switch (rdev->family) {
6749 case CHIP_BONAIRE:
6750 buffer[count++] = cpu_to_le32(0x16000012);
6751 buffer[count++] = cpu_to_le32(0x00000000);
6752 break;
6753 case CHIP_KAVERI:
6754 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6755 buffer[count++] = cpu_to_le32(0x00000000);
6756 break;
6757 case CHIP_KABINI:
6758 case CHIP_MULLINS:
6759 buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6760 buffer[count++] = cpu_to_le32(0x00000000);
6761 break;
6762 case CHIP_HAWAII:
6763 buffer[count++] = cpu_to_le32(0x3a00161a);
6764 buffer[count++] = cpu_to_le32(0x0000002e);
6765 break;
6766 default:
6767 buffer[count++] = cpu_to_le32(0x00000000);
6768 buffer[count++] = cpu_to_le32(0x00000000);
6769 break;
6770 }
6771
6772 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6773 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6774
6775 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6776 buffer[count++] = cpu_to_le32(0);
6777}
6778
6779static void cik_init_pg(struct radeon_device *rdev)
6780{
6781 if (rdev->pg_flags) {
6782 cik_enable_sck_slowdown_on_pu(rdev, true);
6783 cik_enable_sck_slowdown_on_pd(rdev, true);
6784 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6785 cik_init_gfx_cgpg(rdev);
6786 cik_enable_cp_pg(rdev, true);
6787 cik_enable_gds_pg(rdev, true);
6788 }
6789 cik_init_ao_cu_mask(rdev);
6790 cik_update_gfx_pg(rdev, true);
6791 }
6792}
6793
6794static void cik_fini_pg(struct radeon_device *rdev)
6795{
6796 if (rdev->pg_flags) {
6797 cik_update_gfx_pg(rdev, false);
6798 if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6799 cik_enable_cp_pg(rdev, false);
6800 cik_enable_gds_pg(rdev, false);
6801 }
6802 }
6803}
6804
6805/*
6806 * Interrupts
6807 * Starting with r6xx, interrupts are handled via a ring buffer.
6808 * Ring buffers are areas of GPU accessible memory that the GPU
6809 * writes interrupt vectors into and the host reads vectors out of.
6810 * There is a rptr (read pointer) that determines where the
6811 * host is currently reading, and a wptr (write pointer)
6812 * which determines where the GPU has written. When the
6813 * pointers are equal, the ring is idle. When the GPU
6814 * writes vectors to the ring buffer, it increments the
6815 * wptr. When there is an interrupt, the host then starts
6816 * fetching commands and processing them until the pointers are
6817 * equal again at which point it updates the rptr.
6818 */
6819
6820/**
6821 * cik_enable_interrupts - Enable the interrupt ring buffer
6822 *
6823 * @rdev: radeon_device pointer
6824 *
6825 * Enable the interrupt ring buffer (CIK).
6826 */
6827static void cik_enable_interrupts(struct radeon_device *rdev)
6828{
6829 u32 ih_cntl = RREG32(IH_CNTL);
6830 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6831
6832 ih_cntl |= ENABLE_INTR;
6833 ih_rb_cntl |= IH_RB_ENABLE;
6834 WREG32(IH_CNTL, ih_cntl);
6835 WREG32(IH_RB_CNTL, ih_rb_cntl);
6836 rdev->ih.enabled = true;
6837}
6838
6839/**
6840 * cik_disable_interrupts - Disable the interrupt ring buffer
6841 *
6842 * @rdev: radeon_device pointer
6843 *
6844 * Disable the interrupt ring buffer (CIK).
6845 */
6846static void cik_disable_interrupts(struct radeon_device *rdev)
6847{
6848 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6849 u32 ih_cntl = RREG32(IH_CNTL);
6850
6851 ih_rb_cntl &= ~IH_RB_ENABLE;
6852 ih_cntl &= ~ENABLE_INTR;
6853 WREG32(IH_RB_CNTL, ih_rb_cntl);
6854 WREG32(IH_CNTL, ih_cntl);
6855 /* set rptr, wptr to 0 */
6856 WREG32(IH_RB_RPTR, 0);
6857 WREG32(IH_RB_WPTR, 0);
6858 rdev->ih.enabled = false;
6859 rdev->ih.rptr = 0;
6860}
6861
6862/**
6863 * cik_disable_interrupt_state - Disable all interrupt sources
6864 *
6865 * @rdev: radeon_device pointer
6866 *
6867 * Clear all interrupt enable bits used by the driver (CIK).
6868 */
6869static void cik_disable_interrupt_state(struct radeon_device *rdev)
6870{
6871 u32 tmp;
6872
6873 /* gfx ring */
6874 tmp = RREG32(CP_INT_CNTL_RING0) &
6875 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6876 WREG32(CP_INT_CNTL_RING0, tmp);
6877 /* sdma */
6878 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6879 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6880 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6881 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6882 /* compute queues */
6883 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6884 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6885 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6886 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6887 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6888 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6889 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6890 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6891 /* grbm */
6892 WREG32(GRBM_INT_CNTL, 0);
6893 /* SRBM */
6894 WREG32(SRBM_INT_CNTL, 0);
6895 /* vline/vblank, etc. */
6896 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6897 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6898 if (rdev->num_crtc >= 4) {
6899 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6900 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6901 }
6902 if (rdev->num_crtc >= 6) {
6903 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6904 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6905 }
6906 /* pflip */
6907 if (rdev->num_crtc >= 2) {
6908 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6909 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6910 }
6911 if (rdev->num_crtc >= 4) {
6912 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6913 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6914 }
6915 if (rdev->num_crtc >= 6) {
6916 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6917 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6918 }
6919
6920 /* dac hotplug */
6921 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6922
6923 /* digital hotplug */
6924 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6925 WREG32(DC_HPD1_INT_CONTROL, tmp);
6926 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6927 WREG32(DC_HPD2_INT_CONTROL, tmp);
6928 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6929 WREG32(DC_HPD3_INT_CONTROL, tmp);
6930 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6931 WREG32(DC_HPD4_INT_CONTROL, tmp);
6932 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6933 WREG32(DC_HPD5_INT_CONTROL, tmp);
6934 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6935 WREG32(DC_HPD6_INT_CONTROL, tmp);
6936
6937}
6938
6939/**
6940 * cik_irq_init - init and enable the interrupt ring
6941 *
6942 * @rdev: radeon_device pointer
6943 *
6944 * Allocate a ring buffer for the interrupt controller,
6945 * enable the RLC, disable interrupts, enable the IH
6946 * ring buffer and enable it (CIK).
6947 * Called at device load and reume.
6948 * Returns 0 for success, errors for failure.
6949 */
6950static int cik_irq_init(struct radeon_device *rdev)
6951{
6952 int ret = 0;
6953 int rb_bufsz;
6954 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6955
6956 /* allocate ring */
6957 ret = r600_ih_ring_alloc(rdev);
6958 if (ret)
6959 return ret;
6960
6961 /* disable irqs */
6962 cik_disable_interrupts(rdev);
6963
6964 /* init rlc */
6965 ret = cik_rlc_resume(rdev);
6966 if (ret) {
6967 r600_ih_ring_fini(rdev);
6968 return ret;
6969 }
6970
6971 /* setup interrupt control */
6972 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6973 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6974 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6975 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6976 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6977 */
6978 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6979 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6980 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6981 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6982
6983 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6984 rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6985
6986 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6987 IH_WPTR_OVERFLOW_CLEAR |
6988 (rb_bufsz << 1));
6989
6990 if (rdev->wb.enabled)
6991 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6992
6993 /* set the writeback address whether it's enabled or not */
6994 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6995 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6996
6997 WREG32(IH_RB_CNTL, ih_rb_cntl);
6998
6999 /* set rptr, wptr to 0 */
7000 WREG32(IH_RB_RPTR, 0);
7001 WREG32(IH_RB_WPTR, 0);
7002
7003 /* Default settings for IH_CNTL (disabled at first) */
7004 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7005 /* RPTR_REARM only works if msi's are enabled */
7006 if (rdev->msi_enabled)
7007 ih_cntl |= RPTR_REARM;
7008 WREG32(IH_CNTL, ih_cntl);
7009
7010 /* force the active interrupt state to all disabled */
7011 cik_disable_interrupt_state(rdev);
7012
7013 pci_set_master(rdev->pdev);
7014
7015 /* enable irqs */
7016 cik_enable_interrupts(rdev);
7017
7018 return ret;
7019}
7020
7021/**
7022 * cik_irq_set - enable/disable interrupt sources
7023 *
7024 * @rdev: radeon_device pointer
7025 *
7026 * Enable interrupt sources on the GPU (vblanks, hpd,
7027 * etc.) (CIK).
7028 * Returns 0 for success, errors for failure.
7029 */
7030int cik_irq_set(struct radeon_device *rdev)
7031{
7032 u32 cp_int_cntl;
7033 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
7034 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
7035 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7036 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7037 u32 grbm_int_cntl = 0;
7038 u32 dma_cntl, dma_cntl1;
7039
7040 if (!rdev->irq.installed) {
7041 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7042 return -EINVAL;
7043 }
7044 /* don't enable anything if the ih is disabled */
7045 if (!rdev->ih.enabled) {
7046 cik_disable_interrupts(rdev);
7047 /* force the active interrupt state to all disabled */
7048 cik_disable_interrupt_state(rdev);
7049 return 0;
7050 }
7051
7052 cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7053 (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7054 cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7055
7056 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7057 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7058 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7059 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7060 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7061 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7062
7063 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7064 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7065
7066 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7067 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7068 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7069 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7070 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7071 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7072 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7073 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7074
7075 /* enable CP interrupts on all rings */
7076 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7077 DRM_DEBUG("cik_irq_set: sw int gfx\n");
7078 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7079 }
7080 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7081 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7082 DRM_DEBUG("si_irq_set: sw int cp1\n");
7083 if (ring->me == 1) {
7084 switch (ring->pipe) {
7085 case 0:
7086 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7087 break;
7088 case 1:
7089 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7090 break;
7091 case 2:
7092 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7093 break;
7094 case 3:
7095 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7096 break;
7097 default:
7098 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7099 break;
7100 }
7101 } else if (ring->me == 2) {
7102 switch (ring->pipe) {
7103 case 0:
7104 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7105 break;
7106 case 1:
7107 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7108 break;
7109 case 2:
7110 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7111 break;
7112 case 3:
7113 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7114 break;
7115 default:
7116 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7117 break;
7118 }
7119 } else {
7120 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7121 }
7122 }
7123 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7124 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7125 DRM_DEBUG("si_irq_set: sw int cp2\n");
7126 if (ring->me == 1) {
7127 switch (ring->pipe) {
7128 case 0:
7129 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7130 break;
7131 case 1:
7132 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7133 break;
7134 case 2:
7135 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7136 break;
7137 case 3:
7138 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7139 break;
7140 default:
7141 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7142 break;
7143 }
7144 } else if (ring->me == 2) {
7145 switch (ring->pipe) {
7146 case 0:
7147 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7148 break;
7149 case 1:
7150 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7151 break;
7152 case 2:
7153 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7154 break;
7155 case 3:
7156 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7157 break;
7158 default:
7159 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7160 break;
7161 }
7162 } else {
7163 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7164 }
7165 }
7166
7167 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7168 DRM_DEBUG("cik_irq_set: sw int dma\n");
7169 dma_cntl |= TRAP_ENABLE;
7170 }
7171
7172 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7173 DRM_DEBUG("cik_irq_set: sw int dma1\n");
7174 dma_cntl1 |= TRAP_ENABLE;
7175 }
7176
7177 if (rdev->irq.crtc_vblank_int[0] ||
7178 atomic_read(&rdev->irq.pflip[0])) {
7179 DRM_DEBUG("cik_irq_set: vblank 0\n");
7180 crtc1 |= VBLANK_INTERRUPT_MASK;
7181 }
7182 if (rdev->irq.crtc_vblank_int[1] ||
7183 atomic_read(&rdev->irq.pflip[1])) {
7184 DRM_DEBUG("cik_irq_set: vblank 1\n");
7185 crtc2 |= VBLANK_INTERRUPT_MASK;
7186 }
7187 if (rdev->irq.crtc_vblank_int[2] ||
7188 atomic_read(&rdev->irq.pflip[2])) {
7189 DRM_DEBUG("cik_irq_set: vblank 2\n");
7190 crtc3 |= VBLANK_INTERRUPT_MASK;
7191 }
7192 if (rdev->irq.crtc_vblank_int[3] ||
7193 atomic_read(&rdev->irq.pflip[3])) {
7194 DRM_DEBUG("cik_irq_set: vblank 3\n");
7195 crtc4 |= VBLANK_INTERRUPT_MASK;
7196 }
7197 if (rdev->irq.crtc_vblank_int[4] ||
7198 atomic_read(&rdev->irq.pflip[4])) {
7199 DRM_DEBUG("cik_irq_set: vblank 4\n");
7200 crtc5 |= VBLANK_INTERRUPT_MASK;
7201 }
7202 if (rdev->irq.crtc_vblank_int[5] ||
7203 atomic_read(&rdev->irq.pflip[5])) {
7204 DRM_DEBUG("cik_irq_set: vblank 5\n");
7205 crtc6 |= VBLANK_INTERRUPT_MASK;
7206 }
7207 if (rdev->irq.hpd[0]) {
7208 DRM_DEBUG("cik_irq_set: hpd 1\n");
7209 hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7210 }
7211 if (rdev->irq.hpd[1]) {
7212 DRM_DEBUG("cik_irq_set: hpd 2\n");
7213 hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7214 }
7215 if (rdev->irq.hpd[2]) {
7216 DRM_DEBUG("cik_irq_set: hpd 3\n");
7217 hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7218 }
7219 if (rdev->irq.hpd[3]) {
7220 DRM_DEBUG("cik_irq_set: hpd 4\n");
7221 hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7222 }
7223 if (rdev->irq.hpd[4]) {
7224 DRM_DEBUG("cik_irq_set: hpd 5\n");
7225 hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7226 }
7227 if (rdev->irq.hpd[5]) {
7228 DRM_DEBUG("cik_irq_set: hpd 6\n");
7229 hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7230 }
7231
7232 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7233
7234 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7235 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7236
7237 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7238 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7239 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7240 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7241 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7242 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7243 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7244 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7245
7246 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7247
7248 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7249 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7250 if (rdev->num_crtc >= 4) {
7251 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7252 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7253 }
7254 if (rdev->num_crtc >= 6) {
7255 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7256 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7257 }
7258
7259 if (rdev->num_crtc >= 2) {
7260 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7261 GRPH_PFLIP_INT_MASK);
7262 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7263 GRPH_PFLIP_INT_MASK);
7264 }
7265 if (rdev->num_crtc >= 4) {
7266 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7267 GRPH_PFLIP_INT_MASK);
7268 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7269 GRPH_PFLIP_INT_MASK);
7270 }
7271 if (rdev->num_crtc >= 6) {
7272 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7273 GRPH_PFLIP_INT_MASK);
7274 WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7275 GRPH_PFLIP_INT_MASK);
7276 }
7277
7278 WREG32(DC_HPD1_INT_CONTROL, hpd1);
7279 WREG32(DC_HPD2_INT_CONTROL, hpd2);
7280 WREG32(DC_HPD3_INT_CONTROL, hpd3);
7281 WREG32(DC_HPD4_INT_CONTROL, hpd4);
7282 WREG32(DC_HPD5_INT_CONTROL, hpd5);
7283 WREG32(DC_HPD6_INT_CONTROL, hpd6);
7284
7285 /* posting read */
7286 RREG32(SRBM_STATUS);
7287
7288 return 0;
7289}
7290
7291/**
7292 * cik_irq_ack - ack interrupt sources
7293 *
7294 * @rdev: radeon_device pointer
7295 *
7296 * Ack interrupt sources on the GPU (vblanks, hpd,
7297 * etc.) (CIK). Certain interrupts sources are sw
7298 * generated and do not require an explicit ack.
7299 */
7300static inline void cik_irq_ack(struct radeon_device *rdev)
7301{
7302 u32 tmp;
7303
7304 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7305 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7306 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7307 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7308 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7309 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7310 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7311
7312 rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7313 EVERGREEN_CRTC0_REGISTER_OFFSET);
7314 rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7315 EVERGREEN_CRTC1_REGISTER_OFFSET);
7316 if (rdev->num_crtc >= 4) {
7317 rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7318 EVERGREEN_CRTC2_REGISTER_OFFSET);
7319 rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7320 EVERGREEN_CRTC3_REGISTER_OFFSET);
7321 }
7322 if (rdev->num_crtc >= 6) {
7323 rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7324 EVERGREEN_CRTC4_REGISTER_OFFSET);
7325 rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7326 EVERGREEN_CRTC5_REGISTER_OFFSET);
7327 }
7328
7329 if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7330 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7331 GRPH_PFLIP_INT_CLEAR);
7332 if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7333 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7334 GRPH_PFLIP_INT_CLEAR);
7335 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7336 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7337 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7338 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7339 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7340 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7341 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7342 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7343
7344 if (rdev->num_crtc >= 4) {
7345 if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7346 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7347 GRPH_PFLIP_INT_CLEAR);
7348 if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7349 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7350 GRPH_PFLIP_INT_CLEAR);
7351 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7352 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7353 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7354 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7355 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7356 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7357 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7358 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7359 }
7360
7361 if (rdev->num_crtc >= 6) {
7362 if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7363 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7364 GRPH_PFLIP_INT_CLEAR);
7365 if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7366 WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7367 GRPH_PFLIP_INT_CLEAR);
7368 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7369 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7370 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7371 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7372 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7373 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7374 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7375 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7376 }
7377
7378 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7379 tmp = RREG32(DC_HPD1_INT_CONTROL);
7380 tmp |= DC_HPDx_INT_ACK;
7381 WREG32(DC_HPD1_INT_CONTROL, tmp);
7382 }
7383 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7384 tmp = RREG32(DC_HPD2_INT_CONTROL);
7385 tmp |= DC_HPDx_INT_ACK;
7386 WREG32(DC_HPD2_INT_CONTROL, tmp);
7387 }
7388 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7389 tmp = RREG32(DC_HPD3_INT_CONTROL);
7390 tmp |= DC_HPDx_INT_ACK;
7391 WREG32(DC_HPD3_INT_CONTROL, tmp);
7392 }
7393 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7394 tmp = RREG32(DC_HPD4_INT_CONTROL);
7395 tmp |= DC_HPDx_INT_ACK;
7396 WREG32(DC_HPD4_INT_CONTROL, tmp);
7397 }
7398 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7399 tmp = RREG32(DC_HPD5_INT_CONTROL);
7400 tmp |= DC_HPDx_INT_ACK;
7401 WREG32(DC_HPD5_INT_CONTROL, tmp);
7402 }
7403 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7404 tmp = RREG32(DC_HPD6_INT_CONTROL);
7405 tmp |= DC_HPDx_INT_ACK;
7406 WREG32(DC_HPD6_INT_CONTROL, tmp);
7407 }
7408 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7409 tmp = RREG32(DC_HPD1_INT_CONTROL);
7410 tmp |= DC_HPDx_RX_INT_ACK;
7411 WREG32(DC_HPD1_INT_CONTROL, tmp);
7412 }
7413 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7414 tmp = RREG32(DC_HPD2_INT_CONTROL);
7415 tmp |= DC_HPDx_RX_INT_ACK;
7416 WREG32(DC_HPD2_INT_CONTROL, tmp);
7417 }
7418 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7419 tmp = RREG32(DC_HPD3_INT_CONTROL);
7420 tmp |= DC_HPDx_RX_INT_ACK;
7421 WREG32(DC_HPD3_INT_CONTROL, tmp);
7422 }
7423 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7424 tmp = RREG32(DC_HPD4_INT_CONTROL);
7425 tmp |= DC_HPDx_RX_INT_ACK;
7426 WREG32(DC_HPD4_INT_CONTROL, tmp);
7427 }
7428 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7429 tmp = RREG32(DC_HPD5_INT_CONTROL);
7430 tmp |= DC_HPDx_RX_INT_ACK;
7431 WREG32(DC_HPD5_INT_CONTROL, tmp);
7432 }
7433 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7434 tmp = RREG32(DC_HPD6_INT_CONTROL);
7435 tmp |= DC_HPDx_RX_INT_ACK;
7436 WREG32(DC_HPD6_INT_CONTROL, tmp);
7437 }
7438}
7439
7440/**
7441 * cik_irq_disable - disable interrupts
7442 *
7443 * @rdev: radeon_device pointer
7444 *
7445 * Disable interrupts on the hw (CIK).
7446 */
7447static void cik_irq_disable(struct radeon_device *rdev)
7448{
7449 cik_disable_interrupts(rdev);
7450 /* Wait and acknowledge irq */
7451 mdelay(1);
7452 cik_irq_ack(rdev);
7453 cik_disable_interrupt_state(rdev);
7454}
7455
7456/**
7457 * cik_irq_disable - disable interrupts for suspend
7458 *
7459 * @rdev: radeon_device pointer
7460 *
7461 * Disable interrupts and stop the RLC (CIK).
7462 * Used for suspend.
7463 */
7464static void cik_irq_suspend(struct radeon_device *rdev)
7465{
7466 cik_irq_disable(rdev);
7467 cik_rlc_stop(rdev);
7468}
7469
7470/**
7471 * cik_irq_fini - tear down interrupt support
7472 *
7473 * @rdev: radeon_device pointer
7474 *
7475 * Disable interrupts on the hw and free the IH ring
7476 * buffer (CIK).
7477 * Used for driver unload.
7478 */
7479static void cik_irq_fini(struct radeon_device *rdev)
7480{
7481 cik_irq_suspend(rdev);
7482 r600_ih_ring_fini(rdev);
7483}
7484
7485/**
7486 * cik_get_ih_wptr - get the IH ring buffer wptr
7487 *
7488 * @rdev: radeon_device pointer
7489 *
7490 * Get the IH ring buffer wptr from either the register
7491 * or the writeback memory buffer (CIK). Also check for
7492 * ring buffer overflow and deal with it.
7493 * Used by cik_irq_process().
7494 * Returns the value of the wptr.
7495 */
7496static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7497{
7498 u32 wptr, tmp;
7499
7500 if (rdev->wb.enabled)
7501 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7502 else
7503 wptr = RREG32(IH_RB_WPTR);
7504
7505 if (wptr & RB_OVERFLOW) {
7506 wptr &= ~RB_OVERFLOW;
7507 /* When a ring buffer overflow happen start parsing interrupt
7508 * from the last not overwritten vector (wptr + 16). Hopefully
7509 * this should allow us to catchup.
7510 */
7511 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7512 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7513 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7514 tmp = RREG32(IH_RB_CNTL);
7515 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7516 WREG32(IH_RB_CNTL, tmp);
7517 }
7518 return (wptr & rdev->ih.ptr_mask);
7519}
7520
7521/* CIK IV Ring
7522 * Each IV ring entry is 128 bits:
7523 * [7:0] - interrupt source id
7524 * [31:8] - reserved
7525 * [59:32] - interrupt source data
7526 * [63:60] - reserved
7527 * [71:64] - RINGID
7528 * CP:
7529 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7530 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7531 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7532 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7533 * PIPE_ID - ME0 0=3D
7534 * - ME1&2 compute dispatcher (4 pipes each)
7535 * SDMA:
7536 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7537 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7538 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7539 * [79:72] - VMID
7540 * [95:80] - PASID
7541 * [127:96] - reserved
7542 */
7543/**
7544 * cik_irq_process - interrupt handler
7545 *
7546 * @rdev: radeon_device pointer
7547 *
7548 * Interrupt hander (CIK). Walk the IH ring,
7549 * ack interrupts and schedule work to handle
7550 * interrupt events.
7551 * Returns irq process return code.
7552 */
7553int cik_irq_process(struct radeon_device *rdev)
7554{
7555 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7556 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7557 u32 wptr;
7558 u32 rptr;
7559 u32 src_id, src_data, ring_id;
7560 u8 me_id, pipe_id, queue_id;
7561 u32 ring_index;
7562 bool queue_hotplug = false;
7563 bool queue_dp = false;
7564 bool queue_reset = false;
7565 u32 addr, status, mc_client;
7566 bool queue_thermal = false;
7567
7568 if (!rdev->ih.enabled || rdev->shutdown)
7569 return IRQ_NONE;
7570
7571 wptr = cik_get_ih_wptr(rdev);
7572
7573restart_ih:
7574 /* is somebody else already processing irqs? */
7575 if (atomic_xchg(&rdev->ih.lock, 1))
7576 return IRQ_NONE;
7577
7578 rptr = rdev->ih.rptr;
7579 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7580
7581 /* Order reading of wptr vs. reading of IH ring data */
7582 rmb();
7583
7584 /* display interrupts */
7585 cik_irq_ack(rdev);
7586
7587 while (rptr != wptr) {
7588 /* wptr/rptr are in bytes! */
7589 ring_index = rptr / 4;
7590
7591 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7592 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7593 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7594
7595 switch (src_id) {
7596 case 1: /* D1 vblank/vline */
7597 switch (src_data) {
7598 case 0: /* D1 vblank */
7599 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7600 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7601
7602 if (rdev->irq.crtc_vblank_int[0]) {
7603 drm_handle_vblank(rdev->ddev, 0);
7604 rdev->pm.vblank_sync = true;
7605 wake_up(&rdev->irq.vblank_queue);
7606 }
7607 if (atomic_read(&rdev->irq.pflip[0]))
7608 radeon_crtc_handle_vblank(rdev, 0);
7609 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7610 DRM_DEBUG("IH: D1 vblank\n");
7611
7612 break;
7613 case 1: /* D1 vline */
7614 if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7615 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7616
7617 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7618 DRM_DEBUG("IH: D1 vline\n");
7619
7620 break;
7621 default:
7622 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7623 break;
7624 }
7625 break;
7626 case 2: /* D2 vblank/vline */
7627 switch (src_data) {
7628 case 0: /* D2 vblank */
7629 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7630 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7631
7632 if (rdev->irq.crtc_vblank_int[1]) {
7633 drm_handle_vblank(rdev->ddev, 1);
7634 rdev->pm.vblank_sync = true;
7635 wake_up(&rdev->irq.vblank_queue);
7636 }
7637 if (atomic_read(&rdev->irq.pflip[1]))
7638 radeon_crtc_handle_vblank(rdev, 1);
7639 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7640 DRM_DEBUG("IH: D2 vblank\n");
7641
7642 break;
7643 case 1: /* D2 vline */
7644 if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
7645 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7646
7647 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7648 DRM_DEBUG("IH: D2 vline\n");
7649
7650 break;
7651 default:
7652 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7653 break;
7654 }
7655 break;
7656 case 3: /* D3 vblank/vline */
7657 switch (src_data) {
7658 case 0: /* D3 vblank */
7659 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
7660 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7661
7662 if (rdev->irq.crtc_vblank_int[2]) {
7663 drm_handle_vblank(rdev->ddev, 2);
7664 rdev->pm.vblank_sync = true;
7665 wake_up(&rdev->irq.vblank_queue);
7666 }
7667 if (atomic_read(&rdev->irq.pflip[2]))
7668 radeon_crtc_handle_vblank(rdev, 2);
7669 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7670 DRM_DEBUG("IH: D3 vblank\n");
7671
7672 break;
7673 case 1: /* D3 vline */
7674 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
7675 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7676
7677 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7678 DRM_DEBUG("IH: D3 vline\n");
7679
7680 break;
7681 default:
7682 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7683 break;
7684 }
7685 break;
7686 case 4: /* D4 vblank/vline */
7687 switch (src_data) {
7688 case 0: /* D4 vblank */
7689 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
7690 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7691
7692 if (rdev->irq.crtc_vblank_int[3]) {
7693 drm_handle_vblank(rdev->ddev, 3);
7694 rdev->pm.vblank_sync = true;
7695 wake_up(&rdev->irq.vblank_queue);
7696 }
7697 if (atomic_read(&rdev->irq.pflip[3]))
7698 radeon_crtc_handle_vblank(rdev, 3);
7699 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7700 DRM_DEBUG("IH: D4 vblank\n");
7701
7702 break;
7703 case 1: /* D4 vline */
7704 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
7705 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7706
7707 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7708 DRM_DEBUG("IH: D4 vline\n");
7709
7710 break;
7711 default:
7712 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7713 break;
7714 }
7715 break;
7716 case 5: /* D5 vblank/vline */
7717 switch (src_data) {
7718 case 0: /* D5 vblank */
7719 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
7720 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7721
7722 if (rdev->irq.crtc_vblank_int[4]) {
7723 drm_handle_vblank(rdev->ddev, 4);
7724 rdev->pm.vblank_sync = true;
7725 wake_up(&rdev->irq.vblank_queue);
7726 }
7727 if (atomic_read(&rdev->irq.pflip[4]))
7728 radeon_crtc_handle_vblank(rdev, 4);
7729 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7730 DRM_DEBUG("IH: D5 vblank\n");
7731
7732 break;
7733 case 1: /* D5 vline */
7734 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
7735 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7736
7737 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7738 DRM_DEBUG("IH: D5 vline\n");
7739
7740 break;
7741 default:
7742 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7743 break;
7744 }
7745 break;
7746 case 6: /* D6 vblank/vline */
7747 switch (src_data) {
7748 case 0: /* D6 vblank */
7749 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
7750 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7751
7752 if (rdev->irq.crtc_vblank_int[5]) {
7753 drm_handle_vblank(rdev->ddev, 5);
7754 rdev->pm.vblank_sync = true;
7755 wake_up(&rdev->irq.vblank_queue);
7756 }
7757 if (atomic_read(&rdev->irq.pflip[5]))
7758 radeon_crtc_handle_vblank(rdev, 5);
7759 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7760 DRM_DEBUG("IH: D6 vblank\n");
7761
7762 break;
7763 case 1: /* D6 vline */
7764 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
7765 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7766
7767 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7768 DRM_DEBUG("IH: D6 vline\n");
7769
7770 break;
7771 default:
7772 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7773 break;
7774 }
7775 break;
7776 case 8: /* D1 page flip */
7777 case 10: /* D2 page flip */
7778 case 12: /* D3 page flip */
7779 case 14: /* D4 page flip */
7780 case 16: /* D5 page flip */
7781 case 18: /* D6 page flip */
7782 DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7783 if (radeon_use_pflipirq > 0)
7784 radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7785 break;
7786 case 42: /* HPD hotplug */
7787 switch (src_data) {
7788 case 0:
7789 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
7790 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7791
7792 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7793 queue_hotplug = true;
7794 DRM_DEBUG("IH: HPD1\n");
7795
7796 break;
7797 case 1:
7798 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
7799 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7800
7801 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7802 queue_hotplug = true;
7803 DRM_DEBUG("IH: HPD2\n");
7804
7805 break;
7806 case 2:
7807 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
7808 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7809
7810 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7811 queue_hotplug = true;
7812 DRM_DEBUG("IH: HPD3\n");
7813
7814 break;
7815 case 3:
7816 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
7817 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7818
7819 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7820 queue_hotplug = true;
7821 DRM_DEBUG("IH: HPD4\n");
7822
7823 break;
7824 case 4:
7825 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
7826 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7827
7828 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7829 queue_hotplug = true;
7830 DRM_DEBUG("IH: HPD5\n");
7831
7832 break;
7833 case 5:
7834 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
7835 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7836
7837 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7838 queue_hotplug = true;
7839 DRM_DEBUG("IH: HPD6\n");
7840
7841 break;
7842 case 6:
7843 if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
7844 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7845
7846 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
7847 queue_dp = true;
7848 DRM_DEBUG("IH: HPD_RX 1\n");
7849
7850 break;
7851 case 7:
7852 if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
7853 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7854
7855 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
7856 queue_dp = true;
7857 DRM_DEBUG("IH: HPD_RX 2\n");
7858
7859 break;
7860 case 8:
7861 if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
7862 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7863
7864 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
7865 queue_dp = true;
7866 DRM_DEBUG("IH: HPD_RX 3\n");
7867
7868 break;
7869 case 9:
7870 if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
7871 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7872
7873 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
7874 queue_dp = true;
7875 DRM_DEBUG("IH: HPD_RX 4\n");
7876
7877 break;
7878 case 10:
7879 if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
7880 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7881
7882 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
7883 queue_dp = true;
7884 DRM_DEBUG("IH: HPD_RX 5\n");
7885
7886 break;
7887 case 11:
7888 if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
7889 DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7890
7891 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
7892 queue_dp = true;
7893 DRM_DEBUG("IH: HPD_RX 6\n");
7894
7895 break;
7896 default:
7897 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7898 break;
7899 }
7900 break;
7901 case 96:
7902 DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
7903 WREG32(SRBM_INT_ACK, 0x1);
7904 break;
7905 case 124: /* UVD */
7906 DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7907 radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7908 break;
7909 case 146:
7910 case 147:
7911 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7912 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7913 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7914 /* reset addr and status */
7915 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7916 if (addr == 0x0 && status == 0x0)
7917 break;
7918 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7919 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7920 addr);
7921 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7922 status);
7923 cik_vm_decode_fault(rdev, status, addr, mc_client);
7924 break;
7925 case 167: /* VCE */
7926 DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7927 switch (src_data) {
7928 case 0:
7929 radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7930 break;
7931 case 1:
7932 radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7933 break;
7934 default:
7935 DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7936 break;
7937 }
7938 break;
7939 case 176: /* GFX RB CP_INT */
7940 case 177: /* GFX IB CP_INT */
7941 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7942 break;
7943 case 181: /* CP EOP event */
7944 DRM_DEBUG("IH: CP EOP\n");
7945 /* XXX check the bitfield order! */
7946 me_id = (ring_id & 0x60) >> 5;
7947 pipe_id = (ring_id & 0x18) >> 3;
7948 queue_id = (ring_id & 0x7) >> 0;
7949 switch (me_id) {
7950 case 0:
7951 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7952 break;
7953 case 1:
7954 case 2:
7955 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7956 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7957 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7958 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7959 break;
7960 }
7961 break;
7962 case 184: /* CP Privileged reg access */
7963 DRM_ERROR("Illegal register access in command stream\n");
7964 /* XXX check the bitfield order! */
7965 me_id = (ring_id & 0x60) >> 5;
7966 pipe_id = (ring_id & 0x18) >> 3;
7967 queue_id = (ring_id & 0x7) >> 0;
7968 switch (me_id) {
7969 case 0:
7970 /* This results in a full GPU reset, but all we need to do is soft
7971 * reset the CP for gfx
7972 */
7973 queue_reset = true;
7974 break;
7975 case 1:
7976 /* XXX compute */
7977 queue_reset = true;
7978 break;
7979 case 2:
7980 /* XXX compute */
7981 queue_reset = true;
7982 break;
7983 }
7984 break;
7985 case 185: /* CP Privileged inst */
7986 DRM_ERROR("Illegal instruction in command stream\n");
7987 /* XXX check the bitfield order! */
7988 me_id = (ring_id & 0x60) >> 5;
7989 pipe_id = (ring_id & 0x18) >> 3;
7990 queue_id = (ring_id & 0x7) >> 0;
7991 switch (me_id) {
7992 case 0:
7993 /* This results in a full GPU reset, but all we need to do is soft
7994 * reset the CP for gfx
7995 */
7996 queue_reset = true;
7997 break;
7998 case 1:
7999 /* XXX compute */
8000 queue_reset = true;
8001 break;
8002 case 2:
8003 /* XXX compute */
8004 queue_reset = true;
8005 break;
8006 }
8007 break;
8008 case 224: /* SDMA trap event */
8009 /* XXX check the bitfield order! */
8010 me_id = (ring_id & 0x3) >> 0;
8011 queue_id = (ring_id & 0xc) >> 2;
8012 DRM_DEBUG("IH: SDMA trap\n");
8013 switch (me_id) {
8014 case 0:
8015 switch (queue_id) {
8016 case 0:
8017 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8018 break;
8019 case 1:
8020 /* XXX compute */
8021 break;
8022 case 2:
8023 /* XXX compute */
8024 break;
8025 }
8026 break;
8027 case 1:
8028 switch (queue_id) {
8029 case 0:
8030 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8031 break;
8032 case 1:
8033 /* XXX compute */
8034 break;
8035 case 2:
8036 /* XXX compute */
8037 break;
8038 }
8039 break;
8040 }
8041 break;
8042 case 230: /* thermal low to high */
8043 DRM_DEBUG("IH: thermal low to high\n");
8044 rdev->pm.dpm.thermal.high_to_low = false;
8045 queue_thermal = true;
8046 break;
8047 case 231: /* thermal high to low */
8048 DRM_DEBUG("IH: thermal high to low\n");
8049 rdev->pm.dpm.thermal.high_to_low = true;
8050 queue_thermal = true;
8051 break;
8052 case 233: /* GUI IDLE */
8053 DRM_DEBUG("IH: GUI idle\n");
8054 break;
8055 case 241: /* SDMA Privileged inst */
8056 case 247: /* SDMA Privileged inst */
8057 DRM_ERROR("Illegal instruction in SDMA command stream\n");
8058 /* XXX check the bitfield order! */
8059 me_id = (ring_id & 0x3) >> 0;
8060 queue_id = (ring_id & 0xc) >> 2;
8061 switch (me_id) {
8062 case 0:
8063 switch (queue_id) {
8064 case 0:
8065 queue_reset = true;
8066 break;
8067 case 1:
8068 /* XXX compute */
8069 queue_reset = true;
8070 break;
8071 case 2:
8072 /* XXX compute */
8073 queue_reset = true;
8074 break;
8075 }
8076 break;
8077 case 1:
8078 switch (queue_id) {
8079 case 0:
8080 queue_reset = true;
8081 break;
8082 case 1:
8083 /* XXX compute */
8084 queue_reset = true;
8085 break;
8086 case 2:
8087 /* XXX compute */
8088 queue_reset = true;
8089 break;
8090 }
8091 break;
8092 }
8093 break;
8094 default:
8095 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8096 break;
8097 }
8098
8099 /* wptr/rptr are in bytes! */
8100 rptr += 16;
8101 rptr &= rdev->ih.ptr_mask;
8102 WREG32(IH_RB_RPTR, rptr);
8103 }
8104 if (queue_dp)
8105 schedule_work(&rdev->dp_work);
8106 if (queue_hotplug)
8107 schedule_delayed_work(&rdev->hotplug_work, 0);
8108 if (queue_reset) {
8109 rdev->needs_reset = true;
8110 wake_up_all(&rdev->fence_queue);
8111 }
8112 if (queue_thermal)
8113 schedule_work(&rdev->pm.dpm.thermal.work);
8114 rdev->ih.rptr = rptr;
8115 atomic_set(&rdev->ih.lock, 0);
8116
8117 /* make sure wptr hasn't changed while processing */
8118 wptr = cik_get_ih_wptr(rdev);
8119 if (wptr != rptr)
8120 goto restart_ih;
8121
8122 return IRQ_HANDLED;
8123}
8124
8125/*
8126 * startup/shutdown callbacks
8127 */
8128static void cik_uvd_init(struct radeon_device *rdev)
8129{
8130 int r;
8131
8132 if (!rdev->has_uvd)
8133 return;
8134
8135 r = radeon_uvd_init(rdev);
8136 if (r) {
8137 dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
8138 /*
8139 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
8140 * to early fails cik_uvd_start() and thus nothing happens
8141 * there. So it is pointless to try to go through that code
8142 * hence why we disable uvd here.
8143 */
8144 rdev->has_uvd = 0;
8145 return;
8146 }
8147 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
8148 r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
8149}
8150
8151static void cik_uvd_start(struct radeon_device *rdev)
8152{
8153 int r;
8154
8155 if (!rdev->has_uvd)
8156 return;
8157
8158 r = radeon_uvd_resume(rdev);
8159 if (r) {
8160 dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
8161 goto error;
8162 }
8163 r = uvd_v4_2_resume(rdev);
8164 if (r) {
8165 dev_err(rdev->dev, "failed UVD 4.2 resume (%d).\n", r);
8166 goto error;
8167 }
8168 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
8169 if (r) {
8170 dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
8171 goto error;
8172 }
8173 return;
8174
8175error:
8176 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8177}
8178
8179static void cik_uvd_resume(struct radeon_device *rdev)
8180{
8181 struct radeon_ring *ring;
8182 int r;
8183
8184 if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
8185 return;
8186
8187 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8188 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
8189 if (r) {
8190 dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
8191 return;
8192 }
8193 r = uvd_v1_0_init(rdev);
8194 if (r) {
8195 dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
8196 return;
8197 }
8198}
8199
8200static void cik_vce_init(struct radeon_device *rdev)
8201{
8202 int r;
8203
8204 if (!rdev->has_vce)
8205 return;
8206
8207 r = radeon_vce_init(rdev);
8208 if (r) {
8209 dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
8210 /*
8211 * At this point rdev->vce.vcpu_bo is NULL which trickles down
8212 * to early fails cik_vce_start() and thus nothing happens
8213 * there. So it is pointless to try to go through that code
8214 * hence why we disable vce here.
8215 */
8216 rdev->has_vce = 0;
8217 return;
8218 }
8219 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
8220 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
8221 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
8222 r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
8223}
8224
8225static void cik_vce_start(struct radeon_device *rdev)
8226{
8227 int r;
8228
8229 if (!rdev->has_vce)
8230 return;
8231
8232 r = radeon_vce_resume(rdev);
8233 if (r) {
8234 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8235 goto error;
8236 }
8237 r = vce_v2_0_resume(rdev);
8238 if (r) {
8239 dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
8240 goto error;
8241 }
8242 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
8243 if (r) {
8244 dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
8245 goto error;
8246 }
8247 r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
8248 if (r) {
8249 dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
8250 goto error;
8251 }
8252 return;
8253
8254error:
8255 rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8256 rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8257}
8258
8259static void cik_vce_resume(struct radeon_device *rdev)
8260{
8261 struct radeon_ring *ring;
8262 int r;
8263
8264 if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
8265 return;
8266
8267 ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8268 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8269 if (r) {
8270 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8271 return;
8272 }
8273 ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8274 r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
8275 if (r) {
8276 dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
8277 return;
8278 }
8279 r = vce_v1_0_init(rdev);
8280 if (r) {
8281 dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
8282 return;
8283 }
8284}
8285
8286/**
8287 * cik_startup - program the asic to a functional state
8288 *
8289 * @rdev: radeon_device pointer
8290 *
8291 * Programs the asic to a functional state (CIK).
8292 * Called by cik_init() and cik_resume().
8293 * Returns 0 for success, error for failure.
8294 */
8295static int cik_startup(struct radeon_device *rdev)
8296{
8297 struct radeon_ring *ring;
8298 u32 nop;
8299 int r;
8300
8301 /* enable pcie gen2/3 link */
8302 cik_pcie_gen3_enable(rdev);
8303 /* enable aspm */
8304 cik_program_aspm(rdev);
8305
8306 /* scratch needs to be initialized before MC */
8307 r = r600_vram_scratch_init(rdev);
8308 if (r)
8309 return r;
8310
8311 cik_mc_program(rdev);
8312
8313 if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8314 r = ci_mc_load_microcode(rdev);
8315 if (r) {
8316 DRM_ERROR("Failed to load MC firmware!\n");
8317 return r;
8318 }
8319 }
8320
8321 r = cik_pcie_gart_enable(rdev);
8322 if (r)
8323 return r;
8324 cik_gpu_init(rdev);
8325
8326 /* allocate rlc buffers */
8327 if (rdev->flags & RADEON_IS_IGP) {
8328 if (rdev->family == CHIP_KAVERI) {
8329 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8330 rdev->rlc.reg_list_size =
8331 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8332 } else {
8333 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8334 rdev->rlc.reg_list_size =
8335 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8336 }
8337 }
8338 rdev->rlc.cs_data = ci_cs_data;
8339 rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
8340 rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
8341 r = sumo_rlc_init(rdev);
8342 if (r) {
8343 DRM_ERROR("Failed to init rlc BOs!\n");
8344 return r;
8345 }
8346
8347 /* allocate wb buffer */
8348 r = radeon_wb_init(rdev);
8349 if (r)
8350 return r;
8351
8352 /* allocate mec buffers */
8353 r = cik_mec_init(rdev);
8354 if (r) {
8355 DRM_ERROR("Failed to init MEC BOs!\n");
8356 return r;
8357 }
8358
8359 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8360 if (r) {
8361 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8362 return r;
8363 }
8364
8365 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8366 if (r) {
8367 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8368 return r;
8369 }
8370
8371 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8372 if (r) {
8373 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8374 return r;
8375 }
8376
8377 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8378 if (r) {
8379 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8380 return r;
8381 }
8382
8383 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8384 if (r) {
8385 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8386 return r;
8387 }
8388
8389 cik_uvd_start(rdev);
8390 cik_vce_start(rdev);
8391
8392 /* Enable IRQ */
8393 if (!rdev->irq.installed) {
8394 r = radeon_irq_kms_init(rdev);
8395 if (r)
8396 return r;
8397 }
8398
8399 r = cik_irq_init(rdev);
8400 if (r) {
8401 DRM_ERROR("radeon: IH init failed (%d).\n", r);
8402 radeon_irq_kms_fini(rdev);
8403 return r;
8404 }
8405 cik_irq_set(rdev);
8406
8407 if (rdev->family == CHIP_HAWAII) {
8408 if (rdev->new_fw)
8409 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8410 else
8411 nop = RADEON_CP_PACKET2;
8412 } else {
8413 nop = PACKET3(PACKET3_NOP, 0x3FFF);
8414 }
8415
8416 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8417 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8418 nop);
8419 if (r)
8420 return r;
8421
8422 /* set up the compute queues */
8423 /* type-2 packets are deprecated on MEC, use type-3 instead */
8424 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8425 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8426 nop);
8427 if (r)
8428 return r;
8429 ring->me = 1; /* first MEC */
8430 ring->pipe = 0; /* first pipe */
8431 ring->queue = 0; /* first queue */
8432 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8433
8434 /* type-2 packets are deprecated on MEC, use type-3 instead */
8435 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8436 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8437 nop);
8438 if (r)
8439 return r;
8440 /* dGPU only have 1 MEC */
8441 ring->me = 1; /* first MEC */
8442 ring->pipe = 0; /* first pipe */
8443 ring->queue = 1; /* second queue */
8444 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8445
8446 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8447 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8448 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8449 if (r)
8450 return r;
8451
8452 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8453 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8454 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8455 if (r)
8456 return r;
8457
8458 r = cik_cp_resume(rdev);
8459 if (r)
8460 return r;
8461
8462 r = cik_sdma_resume(rdev);
8463 if (r)
8464 return r;
8465
8466 cik_uvd_resume(rdev);
8467 cik_vce_resume(rdev);
8468
8469 r = radeon_ib_pool_init(rdev);
8470 if (r) {
8471 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8472 return r;
8473 }
8474
8475 r = radeon_vm_manager_init(rdev);
8476 if (r) {
8477 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8478 return r;
8479 }
8480
8481 r = radeon_audio_init(rdev);
8482 if (r)
8483 return r;
8484
8485 return 0;
8486}
8487
8488/**
8489 * cik_resume - resume the asic to a functional state
8490 *
8491 * @rdev: radeon_device pointer
8492 *
8493 * Programs the asic to a functional state (CIK).
8494 * Called at resume.
8495 * Returns 0 for success, error for failure.
8496 */
8497int cik_resume(struct radeon_device *rdev)
8498{
8499 int r;
8500
8501 /* post card */
8502 atom_asic_init(rdev->mode_info.atom_context);
8503
8504 /* init golden registers */
8505 cik_init_golden_registers(rdev);
8506
8507 if (rdev->pm.pm_method == PM_METHOD_DPM)
8508 radeon_pm_resume(rdev);
8509
8510 rdev->accel_working = true;
8511 r = cik_startup(rdev);
8512 if (r) {
8513 DRM_ERROR("cik startup failed on resume\n");
8514 rdev->accel_working = false;
8515 return r;
8516 }
8517
8518 return r;
8519
8520}
8521
8522/**
8523 * cik_suspend - suspend the asic
8524 *
8525 * @rdev: radeon_device pointer
8526 *
8527 * Bring the chip into a state suitable for suspend (CIK).
8528 * Called at suspend.
8529 * Returns 0 for success.
8530 */
8531int cik_suspend(struct radeon_device *rdev)
8532{
8533 radeon_pm_suspend(rdev);
8534 radeon_audio_fini(rdev);
8535 radeon_vm_manager_fini(rdev);
8536 cik_cp_enable(rdev, false);
8537 cik_sdma_enable(rdev, false);
8538 if (rdev->has_uvd) {
8539 uvd_v1_0_fini(rdev);
8540 radeon_uvd_suspend(rdev);
8541 }
8542 if (rdev->has_vce)
8543 radeon_vce_suspend(rdev);
8544 cik_fini_pg(rdev);
8545 cik_fini_cg(rdev);
8546 cik_irq_suspend(rdev);
8547 radeon_wb_disable(rdev);
8548 cik_pcie_gart_disable(rdev);
8549 return 0;
8550}
8551
8552/* Plan is to move initialization in that function and use
8553 * helper function so that radeon_device_init pretty much
8554 * do nothing more than calling asic specific function. This
8555 * should also allow to remove a bunch of callback function
8556 * like vram_info.
8557 */
8558/**
8559 * cik_init - asic specific driver and hw init
8560 *
8561 * @rdev: radeon_device pointer
8562 *
8563 * Setup asic specific driver variables and program the hw
8564 * to a functional state (CIK).
8565 * Called at driver startup.
8566 * Returns 0 for success, errors for failure.
8567 */
8568int cik_init(struct radeon_device *rdev)
8569{
8570 struct radeon_ring *ring;
8571 int r;
8572
8573 /* Read BIOS */
8574 if (!radeon_get_bios(rdev)) {
8575 if (ASIC_IS_AVIVO(rdev))
8576 return -EINVAL;
8577 }
8578 /* Must be an ATOMBIOS */
8579 if (!rdev->is_atom_bios) {
8580 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8581 return -EINVAL;
8582 }
8583 r = radeon_atombios_init(rdev);
8584 if (r)
8585 return r;
8586
8587 /* Post card if necessary */
8588 if (!radeon_card_posted(rdev)) {
8589 if (!rdev->bios) {
8590 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8591 return -EINVAL;
8592 }
8593 DRM_INFO("GPU not posted. posting now...\n");
8594 atom_asic_init(rdev->mode_info.atom_context);
8595 }
8596 /* init golden registers */
8597 cik_init_golden_registers(rdev);
8598 /* Initialize scratch registers */
8599 cik_scratch_init(rdev);
8600 /* Initialize surface registers */
8601 radeon_surface_init(rdev);
8602 /* Initialize clocks */
8603 radeon_get_clock_info(rdev->ddev);
8604
8605 /* Fence driver */
8606 r = radeon_fence_driver_init(rdev);
8607 if (r)
8608 return r;
8609
8610 /* initialize memory controller */
8611 r = cik_mc_init(rdev);
8612 if (r)
8613 return r;
8614 /* Memory manager */
8615 r = radeon_bo_init(rdev);
8616 if (r)
8617 return r;
8618
8619 if (rdev->flags & RADEON_IS_IGP) {
8620 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8621 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8622 r = cik_init_microcode(rdev);
8623 if (r) {
8624 DRM_ERROR("Failed to load firmware!\n");
8625 return r;
8626 }
8627 }
8628 } else {
8629 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8630 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8631 !rdev->mc_fw) {
8632 r = cik_init_microcode(rdev);
8633 if (r) {
8634 DRM_ERROR("Failed to load firmware!\n");
8635 return r;
8636 }
8637 }
8638 }
8639
8640 /* Initialize power management */
8641 radeon_pm_init(rdev);
8642
8643 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8644 ring->ring_obj = NULL;
8645 r600_ring_init(rdev, ring, 1024 * 1024);
8646
8647 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8648 ring->ring_obj = NULL;
8649 r600_ring_init(rdev, ring, 1024 * 1024);
8650 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8651 if (r)
8652 return r;
8653
8654 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8655 ring->ring_obj = NULL;
8656 r600_ring_init(rdev, ring, 1024 * 1024);
8657 r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8658 if (r)
8659 return r;
8660
8661 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8662 ring->ring_obj = NULL;
8663 r600_ring_init(rdev, ring, 256 * 1024);
8664
8665 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8666 ring->ring_obj = NULL;
8667 r600_ring_init(rdev, ring, 256 * 1024);
8668
8669 cik_uvd_init(rdev);
8670 cik_vce_init(rdev);
8671
8672 rdev->ih.ring_obj = NULL;
8673 r600_ih_ring_init(rdev, 64 * 1024);
8674
8675 r = r600_pcie_gart_init(rdev);
8676 if (r)
8677 return r;
8678
8679 rdev->accel_working = true;
8680 r = cik_startup(rdev);
8681 if (r) {
8682 dev_err(rdev->dev, "disabling GPU acceleration\n");
8683 cik_cp_fini(rdev);
8684 cik_sdma_fini(rdev);
8685 cik_irq_fini(rdev);
8686 sumo_rlc_fini(rdev);
8687 cik_mec_fini(rdev);
8688 radeon_wb_fini(rdev);
8689 radeon_ib_pool_fini(rdev);
8690 radeon_vm_manager_fini(rdev);
8691 radeon_irq_kms_fini(rdev);
8692 cik_pcie_gart_fini(rdev);
8693 rdev->accel_working = false;
8694 }
8695
8696 /* Don't start up if the MC ucode is missing.
8697 * The default clocks and voltages before the MC ucode
8698 * is loaded are not suffient for advanced operations.
8699 */
8700 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8701 DRM_ERROR("radeon: MC ucode required for NI+.\n");
8702 return -EINVAL;
8703 }
8704
8705 return 0;
8706}
8707
8708/**
8709 * cik_fini - asic specific driver and hw fini
8710 *
8711 * @rdev: radeon_device pointer
8712 *
8713 * Tear down the asic specific driver variables and program the hw
8714 * to an idle state (CIK).
8715 * Called at driver unload.
8716 */
8717void cik_fini(struct radeon_device *rdev)
8718{
8719 radeon_pm_fini(rdev);
8720 cik_cp_fini(rdev);
8721 cik_sdma_fini(rdev);
8722 cik_fini_pg(rdev);
8723 cik_fini_cg(rdev);
8724 cik_irq_fini(rdev);
8725 sumo_rlc_fini(rdev);
8726 cik_mec_fini(rdev);
8727 radeon_wb_fini(rdev);
8728 radeon_vm_manager_fini(rdev);
8729 radeon_ib_pool_fini(rdev);
8730 radeon_irq_kms_fini(rdev);
8731 uvd_v1_0_fini(rdev);
8732 radeon_uvd_fini(rdev);
8733 radeon_vce_fini(rdev);
8734 cik_pcie_gart_fini(rdev);
8735 r600_vram_scratch_fini(rdev);
8736 radeon_gem_fini(rdev);
8737 radeon_fence_driver_fini(rdev);
8738 radeon_bo_fini(rdev);
8739 radeon_atombios_fini(rdev);
8740 kfree(rdev->bios);
8741 rdev->bios = NULL;
8742}
8743
8744void dce8_program_fmt(struct drm_encoder *encoder)
8745{
8746 struct drm_device *dev = encoder->dev;
8747 struct radeon_device *rdev = dev->dev_private;
8748 struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8749 struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8750 struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8751 int bpc = 0;
8752 u32 tmp = 0;
8753 enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8754
8755 if (connector) {
8756 struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8757 bpc = radeon_get_monitor_bpc(connector);
8758 dither = radeon_connector->dither;
8759 }
8760
8761 /* LVDS/eDP FMT is set up by atom */
8762 if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8763 return;
8764
8765 /* not needed for analog */
8766 if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8767 (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8768 return;
8769
8770 if (bpc == 0)
8771 return;
8772
8773 switch (bpc) {
8774 case 6:
8775 if (dither == RADEON_FMT_DITHER_ENABLE)
8776 /* XXX sort out optimal dither settings */
8777 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8778 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8779 else
8780 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8781 break;
8782 case 8:
8783 if (dither == RADEON_FMT_DITHER_ENABLE)
8784 /* XXX sort out optimal dither settings */
8785 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8786 FMT_RGB_RANDOM_ENABLE |
8787 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8788 else
8789 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8790 break;
8791 case 10:
8792 if (dither == RADEON_FMT_DITHER_ENABLE)
8793 /* XXX sort out optimal dither settings */
8794 tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8795 FMT_RGB_RANDOM_ENABLE |
8796 FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8797 else
8798 tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8799 break;
8800 default:
8801 /* not needed */
8802 break;
8803 }
8804
8805 WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8806}
8807
8808/* display watermark setup */
8809/**
8810 * dce8_line_buffer_adjust - Set up the line buffer
8811 *
8812 * @rdev: radeon_device pointer
8813 * @radeon_crtc: the selected display controller
8814 * @mode: the current display mode on the selected display
8815 * controller
8816 *
8817 * Setup up the line buffer allocation for
8818 * the selected display controller (CIK).
8819 * Returns the line buffer size in pixels.
8820 */
8821static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8822 struct radeon_crtc *radeon_crtc,
8823 struct drm_display_mode *mode)
8824{
8825 u32 tmp, buffer_alloc, i;
8826 u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8827 /*
8828 * Line Buffer Setup
8829 * There are 6 line buffers, one for each display controllers.
8830 * There are 3 partitions per LB. Select the number of partitions
8831 * to enable based on the display width. For display widths larger
8832 * than 4096, you need use to use 2 display controllers and combine
8833 * them using the stereo blender.
8834 */
8835 if (radeon_crtc->base.enabled && mode) {
8836 if (mode->crtc_hdisplay < 1920) {
8837 tmp = 1;
8838 buffer_alloc = 2;
8839 } else if (mode->crtc_hdisplay < 2560) {
8840 tmp = 2;
8841 buffer_alloc = 2;
8842 } else if (mode->crtc_hdisplay < 4096) {
8843 tmp = 0;
8844 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8845 } else {
8846 DRM_DEBUG_KMS("Mode too big for LB!\n");
8847 tmp = 0;
8848 buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8849 }
8850 } else {
8851 tmp = 1;
8852 buffer_alloc = 0;
8853 }
8854
8855 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8856 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8857
8858 WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8859 DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8860 for (i = 0; i < rdev->usec_timeout; i++) {
8861 if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8862 DMIF_BUFFERS_ALLOCATED_COMPLETED)
8863 break;
8864 udelay(1);
8865 }
8866
8867 if (radeon_crtc->base.enabled && mode) {
8868 switch (tmp) {
8869 case 0:
8870 default:
8871 return 4096 * 2;
8872 case 1:
8873 return 1920 * 2;
8874 case 2:
8875 return 2560 * 2;
8876 }
8877 }
8878
8879 /* controller not enabled, so no lb used */
8880 return 0;
8881}
8882
8883/**
8884 * cik_get_number_of_dram_channels - get the number of dram channels
8885 *
8886 * @rdev: radeon_device pointer
8887 *
8888 * Look up the number of video ram channels (CIK).
8889 * Used for display watermark bandwidth calculations
8890 * Returns the number of dram channels
8891 */
8892static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8893{
8894 u32 tmp = RREG32(MC_SHARED_CHMAP);
8895
8896 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8897 case 0:
8898 default:
8899 return 1;
8900 case 1:
8901 return 2;
8902 case 2:
8903 return 4;
8904 case 3:
8905 return 8;
8906 case 4:
8907 return 3;
8908 case 5:
8909 return 6;
8910 case 6:
8911 return 10;
8912 case 7:
8913 return 12;
8914 case 8:
8915 return 16;
8916 }
8917}
8918
8919struct dce8_wm_params {
8920 u32 dram_channels; /* number of dram channels */
8921 u32 yclk; /* bandwidth per dram data pin in kHz */
8922 u32 sclk; /* engine clock in kHz */
8923 u32 disp_clk; /* display clock in kHz */
8924 u32 src_width; /* viewport width */
8925 u32 active_time; /* active display time in ns */
8926 u32 blank_time; /* blank time in ns */
8927 bool interlaced; /* mode is interlaced */
8928 fixed20_12 vsc; /* vertical scale ratio */
8929 u32 num_heads; /* number of active crtcs */
8930 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8931 u32 lb_size; /* line buffer allocated to pipe */
8932 u32 vtaps; /* vertical scaler taps */
8933};
8934
8935/**
8936 * dce8_dram_bandwidth - get the dram bandwidth
8937 *
8938 * @wm: watermark calculation data
8939 *
8940 * Calculate the raw dram bandwidth (CIK).
8941 * Used for display watermark bandwidth calculations
8942 * Returns the dram bandwidth in MBytes/s
8943 */
8944static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8945{
8946 /* Calculate raw DRAM Bandwidth */
8947 fixed20_12 dram_efficiency; /* 0.7 */
8948 fixed20_12 yclk, dram_channels, bandwidth;
8949 fixed20_12 a;
8950
8951 a.full = dfixed_const(1000);
8952 yclk.full = dfixed_const(wm->yclk);
8953 yclk.full = dfixed_div(yclk, a);
8954 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8955 a.full = dfixed_const(10);
8956 dram_efficiency.full = dfixed_const(7);
8957 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8958 bandwidth.full = dfixed_mul(dram_channels, yclk);
8959 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8960
8961 return dfixed_trunc(bandwidth);
8962}
8963
8964/**
8965 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8966 *
8967 * @wm: watermark calculation data
8968 *
8969 * Calculate the dram bandwidth used for display (CIK).
8970 * Used for display watermark bandwidth calculations
8971 * Returns the dram bandwidth for display in MBytes/s
8972 */
8973static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8974{
8975 /* Calculate DRAM Bandwidth and the part allocated to display. */
8976 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8977 fixed20_12 yclk, dram_channels, bandwidth;
8978 fixed20_12 a;
8979
8980 a.full = dfixed_const(1000);
8981 yclk.full = dfixed_const(wm->yclk);
8982 yclk.full = dfixed_div(yclk, a);
8983 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8984 a.full = dfixed_const(10);
8985 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8986 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8987 bandwidth.full = dfixed_mul(dram_channels, yclk);
8988 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8989
8990 return dfixed_trunc(bandwidth);
8991}
8992
8993/**
8994 * dce8_data_return_bandwidth - get the data return bandwidth
8995 *
8996 * @wm: watermark calculation data
8997 *
8998 * Calculate the data return bandwidth used for display (CIK).
8999 * Used for display watermark bandwidth calculations
9000 * Returns the data return bandwidth in MBytes/s
9001 */
9002static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9003{
9004 /* Calculate the display Data return Bandwidth */
9005 fixed20_12 return_efficiency; /* 0.8 */
9006 fixed20_12 sclk, bandwidth;
9007 fixed20_12 a;
9008
9009 a.full = dfixed_const(1000);
9010 sclk.full = dfixed_const(wm->sclk);
9011 sclk.full = dfixed_div(sclk, a);
9012 a.full = dfixed_const(10);
9013 return_efficiency.full = dfixed_const(8);
9014 return_efficiency.full = dfixed_div(return_efficiency, a);
9015 a.full = dfixed_const(32);
9016 bandwidth.full = dfixed_mul(a, sclk);
9017 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9018
9019 return dfixed_trunc(bandwidth);
9020}
9021
9022/**
9023 * dce8_dmif_request_bandwidth - get the dmif bandwidth
9024 *
9025 * @wm: watermark calculation data
9026 *
9027 * Calculate the dmif bandwidth used for display (CIK).
9028 * Used for display watermark bandwidth calculations
9029 * Returns the dmif bandwidth in MBytes/s
9030 */
9031static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9032{
9033 /* Calculate the DMIF Request Bandwidth */
9034 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9035 fixed20_12 disp_clk, bandwidth;
9036 fixed20_12 a, b;
9037
9038 a.full = dfixed_const(1000);
9039 disp_clk.full = dfixed_const(wm->disp_clk);
9040 disp_clk.full = dfixed_div(disp_clk, a);
9041 a.full = dfixed_const(32);
9042 b.full = dfixed_mul(a, disp_clk);
9043
9044 a.full = dfixed_const(10);
9045 disp_clk_request_efficiency.full = dfixed_const(8);
9046 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9047
9048 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9049
9050 return dfixed_trunc(bandwidth);
9051}
9052
9053/**
9054 * dce8_available_bandwidth - get the min available bandwidth
9055 *
9056 * @wm: watermark calculation data
9057 *
9058 * Calculate the min available bandwidth used for display (CIK).
9059 * Used for display watermark bandwidth calculations
9060 * Returns the min available bandwidth in MBytes/s
9061 */
9062static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9063{
9064 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9065 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9066 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9067 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9068
9069 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9070}
9071
9072/**
9073 * dce8_average_bandwidth - get the average available bandwidth
9074 *
9075 * @wm: watermark calculation data
9076 *
9077 * Calculate the average available bandwidth used for display (CIK).
9078 * Used for display watermark bandwidth calculations
9079 * Returns the average available bandwidth in MBytes/s
9080 */
9081static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9082{
9083 /* Calculate the display mode Average Bandwidth
9084 * DisplayMode should contain the source and destination dimensions,
9085 * timing, etc.
9086 */
9087 fixed20_12 bpp;
9088 fixed20_12 line_time;
9089 fixed20_12 src_width;
9090 fixed20_12 bandwidth;
9091 fixed20_12 a;
9092
9093 a.full = dfixed_const(1000);
9094 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9095 line_time.full = dfixed_div(line_time, a);
9096 bpp.full = dfixed_const(wm->bytes_per_pixel);
9097 src_width.full = dfixed_const(wm->src_width);
9098 bandwidth.full = dfixed_mul(src_width, bpp);
9099 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9100 bandwidth.full = dfixed_div(bandwidth, line_time);
9101
9102 return dfixed_trunc(bandwidth);
9103}
9104
9105/**
9106 * dce8_latency_watermark - get the latency watermark
9107 *
9108 * @wm: watermark calculation data
9109 *
9110 * Calculate the latency watermark (CIK).
9111 * Used for display watermark bandwidth calculations
9112 * Returns the latency watermark in ns
9113 */
9114static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9115{
9116 /* First calculate the latency in ns */
9117 u32 mc_latency = 2000; /* 2000 ns. */
9118 u32 available_bandwidth = dce8_available_bandwidth(wm);
9119 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9120 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9121 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9122 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9123 (wm->num_heads * cursor_line_pair_return_time);
9124 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9125 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9126 u32 tmp, dmif_size = 12288;
9127 fixed20_12 a, b, c;
9128
9129 if (wm->num_heads == 0)
9130 return 0;
9131
9132 a.full = dfixed_const(2);
9133 b.full = dfixed_const(1);
9134 if ((wm->vsc.full > a.full) ||
9135 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9136 (wm->vtaps >= 5) ||
9137 ((wm->vsc.full >= a.full) && wm->interlaced))
9138 max_src_lines_per_dst_line = 4;
9139 else
9140 max_src_lines_per_dst_line = 2;
9141
9142 a.full = dfixed_const(available_bandwidth);
9143 b.full = dfixed_const(wm->num_heads);
9144 a.full = dfixed_div(a, b);
9145 tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
9146 tmp = min(dfixed_trunc(a), tmp);
9147
9148 lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
9149
9150 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9151 b.full = dfixed_const(1000);
9152 c.full = dfixed_const(lb_fill_bw);
9153 b.full = dfixed_div(c, b);
9154 a.full = dfixed_div(a, b);
9155 line_fill_time = dfixed_trunc(a);
9156
9157 if (line_fill_time < wm->active_time)
9158 return latency;
9159 else
9160 return latency + (line_fill_time - wm->active_time);
9161
9162}
9163
9164/**
9165 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9166 * average and available dram bandwidth
9167 *
9168 * @wm: watermark calculation data
9169 *
9170 * Check if the display average bandwidth fits in the display
9171 * dram bandwidth (CIK).
9172 * Used for display watermark bandwidth calculations
9173 * Returns true if the display fits, false if not.
9174 */
9175static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9176{
9177 if (dce8_average_bandwidth(wm) <=
9178 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9179 return true;
9180 else
9181 return false;
9182}
9183
9184/**
9185 * dce8_average_bandwidth_vs_available_bandwidth - check
9186 * average and available bandwidth
9187 *
9188 * @wm: watermark calculation data
9189 *
9190 * Check if the display average bandwidth fits in the display
9191 * available bandwidth (CIK).
9192 * Used for display watermark bandwidth calculations
9193 * Returns true if the display fits, false if not.
9194 */
9195static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9196{
9197 if (dce8_average_bandwidth(wm) <=
9198 (dce8_available_bandwidth(wm) / wm->num_heads))
9199 return true;
9200 else
9201 return false;
9202}
9203
9204/**
9205 * dce8_check_latency_hiding - check latency hiding
9206 *
9207 * @wm: watermark calculation data
9208 *
9209 * Check latency hiding (CIK).
9210 * Used for display watermark bandwidth calculations
9211 * Returns true if the display fits, false if not.
9212 */
9213static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9214{
9215 u32 lb_partitions = wm->lb_size / wm->src_width;
9216 u32 line_time = wm->active_time + wm->blank_time;
9217 u32 latency_tolerant_lines;
9218 u32 latency_hiding;
9219 fixed20_12 a;
9220
9221 a.full = dfixed_const(1);
9222 if (wm->vsc.full > a.full)
9223 latency_tolerant_lines = 1;
9224 else {
9225 if (lb_partitions <= (wm->vtaps + 1))
9226 latency_tolerant_lines = 1;
9227 else
9228 latency_tolerant_lines = 2;
9229 }
9230
9231 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9232
9233 if (dce8_latency_watermark(wm) <= latency_hiding)
9234 return true;
9235 else
9236 return false;
9237}
9238
9239/**
9240 * dce8_program_watermarks - program display watermarks
9241 *
9242 * @rdev: radeon_device pointer
9243 * @radeon_crtc: the selected display controller
9244 * @lb_size: line buffer size
9245 * @num_heads: number of display controllers in use
9246 *
9247 * Calculate and program the display watermarks for the
9248 * selected display controller (CIK).
9249 */
9250static void dce8_program_watermarks(struct radeon_device *rdev,
9251 struct radeon_crtc *radeon_crtc,
9252 u32 lb_size, u32 num_heads)
9253{
9254 struct drm_display_mode *mode = &radeon_crtc->base.mode;
9255 struct dce8_wm_params wm_low, wm_high;
9256 u32 active_time;
9257 u32 line_time = 0;
9258 u32 latency_watermark_a = 0, latency_watermark_b = 0;
9259 u32 tmp, wm_mask;
9260
9261 if (radeon_crtc->base.enabled && num_heads && mode) {
9262 active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
9263 (u32)mode->clock);
9264 line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
9265 (u32)mode->clock);
9266 line_time = min(line_time, (u32)65535);
9267
9268 /* watermark for high clocks */
9269 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9270 rdev->pm.dpm_enabled) {
9271 wm_high.yclk =
9272 radeon_dpm_get_mclk(rdev, false) * 10;
9273 wm_high.sclk =
9274 radeon_dpm_get_sclk(rdev, false) * 10;
9275 } else {
9276 wm_high.yclk = rdev->pm.current_mclk * 10;
9277 wm_high.sclk = rdev->pm.current_sclk * 10;
9278 }
9279
9280 wm_high.disp_clk = mode->clock;
9281 wm_high.src_width = mode->crtc_hdisplay;
9282 wm_high.active_time = active_time;
9283 wm_high.blank_time = line_time - wm_high.active_time;
9284 wm_high.interlaced = false;
9285 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9286 wm_high.interlaced = true;
9287 wm_high.vsc = radeon_crtc->vsc;
9288 wm_high.vtaps = 1;
9289 if (radeon_crtc->rmx_type != RMX_OFF)
9290 wm_high.vtaps = 2;
9291 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9292 wm_high.lb_size = lb_size;
9293 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9294 wm_high.num_heads = num_heads;
9295
9296 /* set for high clocks */
9297 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9298
9299 /* possibly force display priority to high */
9300 /* should really do this at mode validation time... */
9301 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9302 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9303 !dce8_check_latency_hiding(&wm_high) ||
9304 (rdev->disp_priority == 2)) {
9305 DRM_DEBUG_KMS("force priority to high\n");
9306 }
9307
9308 /* watermark for low clocks */
9309 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9310 rdev->pm.dpm_enabled) {
9311 wm_low.yclk =
9312 radeon_dpm_get_mclk(rdev, true) * 10;
9313 wm_low.sclk =
9314 radeon_dpm_get_sclk(rdev, true) * 10;
9315 } else {
9316 wm_low.yclk = rdev->pm.current_mclk * 10;
9317 wm_low.sclk = rdev->pm.current_sclk * 10;
9318 }
9319
9320 wm_low.disp_clk = mode->clock;
9321 wm_low.src_width = mode->crtc_hdisplay;
9322 wm_low.active_time = active_time;
9323 wm_low.blank_time = line_time - wm_low.active_time;
9324 wm_low.interlaced = false;
9325 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9326 wm_low.interlaced = true;
9327 wm_low.vsc = radeon_crtc->vsc;
9328 wm_low.vtaps = 1;
9329 if (radeon_crtc->rmx_type != RMX_OFF)
9330 wm_low.vtaps = 2;
9331 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9332 wm_low.lb_size = lb_size;
9333 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9334 wm_low.num_heads = num_heads;
9335
9336 /* set for low clocks */
9337 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9338
9339 /* possibly force display priority to high */
9340 /* should really do this at mode validation time... */
9341 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9342 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9343 !dce8_check_latency_hiding(&wm_low) ||
9344 (rdev->disp_priority == 2)) {
9345 DRM_DEBUG_KMS("force priority to high\n");
9346 }
9347
9348 /* Save number of lines the linebuffer leads before the scanout */
9349 radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9350 }
9351
9352 /* select wm A */
9353 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9354 tmp = wm_mask;
9355 tmp &= ~LATENCY_WATERMARK_MASK(3);
9356 tmp |= LATENCY_WATERMARK_MASK(1);
9357 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9358 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9359 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9360 LATENCY_HIGH_WATERMARK(line_time)));
9361 /* select wm B */
9362 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9363 tmp &= ~LATENCY_WATERMARK_MASK(3);
9364 tmp |= LATENCY_WATERMARK_MASK(2);
9365 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9366 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9367 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9368 LATENCY_HIGH_WATERMARK(line_time)));
9369 /* restore original selection */
9370 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9371
9372 /* save values for DPM */
9373 radeon_crtc->line_time = line_time;
9374 radeon_crtc->wm_high = latency_watermark_a;
9375 radeon_crtc->wm_low = latency_watermark_b;
9376}
9377
9378/**
9379 * dce8_bandwidth_update - program display watermarks
9380 *
9381 * @rdev: radeon_device pointer
9382 *
9383 * Calculate and program the display watermarks and line
9384 * buffer allocation (CIK).
9385 */
9386void dce8_bandwidth_update(struct radeon_device *rdev)
9387{
9388 struct drm_display_mode *mode = NULL;
9389 u32 num_heads = 0, lb_size;
9390 int i;
9391
9392 if (!rdev->mode_info.mode_config_initialized)
9393 return;
9394
9395 radeon_update_display_priority(rdev);
9396
9397 for (i = 0; i < rdev->num_crtc; i++) {
9398 if (rdev->mode_info.crtcs[i]->base.enabled)
9399 num_heads++;
9400 }
9401 for (i = 0; i < rdev->num_crtc; i++) {
9402 mode = &rdev->mode_info.crtcs[i]->base.mode;
9403 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9404 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9405 }
9406}
9407
9408/**
9409 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9410 *
9411 * @rdev: radeon_device pointer
9412 *
9413 * Fetches a GPU clock counter snapshot (SI).
9414 * Returns the 64 bit clock counter snapshot.
9415 */
9416uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9417{
9418 uint64_t clock;
9419
9420 mutex_lock(&rdev->gpu_clock_mutex);
9421 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9422 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9423 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9424 mutex_unlock(&rdev->gpu_clock_mutex);
9425 return clock;
9426}
9427
9428static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9429 u32 cntl_reg, u32 status_reg)
9430{
9431 int r, i;
9432 struct atom_clock_dividers dividers;
9433 uint32_t tmp;
9434
9435 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9436 clock, false, ÷rs);
9437 if (r)
9438 return r;
9439
9440 tmp = RREG32_SMC(cntl_reg);
9441 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9442 tmp |= dividers.post_divider;
9443 WREG32_SMC(cntl_reg, tmp);
9444
9445 for (i = 0; i < 100; i++) {
9446 if (RREG32_SMC(status_reg) & DCLK_STATUS)
9447 break;
9448 mdelay(10);
9449 }
9450 if (i == 100)
9451 return -ETIMEDOUT;
9452
9453 return 0;
9454}
9455
9456int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9457{
9458 int r = 0;
9459
9460 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9461 if (r)
9462 return r;
9463
9464 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9465 return r;
9466}
9467
9468int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9469{
9470 int r, i;
9471 struct atom_clock_dividers dividers;
9472 u32 tmp;
9473
9474 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9475 ecclk, false, ÷rs);
9476 if (r)
9477 return r;
9478
9479 for (i = 0; i < 100; i++) {
9480 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9481 break;
9482 mdelay(10);
9483 }
9484 if (i == 100)
9485 return -ETIMEDOUT;
9486
9487 tmp = RREG32_SMC(CG_ECLK_CNTL);
9488 tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9489 tmp |= dividers.post_divider;
9490 WREG32_SMC(CG_ECLK_CNTL, tmp);
9491
9492 for (i = 0; i < 100; i++) {
9493 if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9494 break;
9495 mdelay(10);
9496 }
9497 if (i == 100)
9498 return -ETIMEDOUT;
9499
9500 return 0;
9501}
9502
9503static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9504{
9505 struct pci_dev *root = rdev->pdev->bus->self;
9506 enum pci_bus_speed speed_cap;
9507 int bridge_pos, gpu_pos;
9508 u32 speed_cntl, current_data_rate;
9509 int i;
9510 u16 tmp16;
9511
9512 if (pci_is_root_bus(rdev->pdev->bus))
9513 return;
9514
9515 if (radeon_pcie_gen2 == 0)
9516 return;
9517
9518 if (rdev->flags & RADEON_IS_IGP)
9519 return;
9520
9521 if (!(rdev->flags & RADEON_IS_PCIE))
9522 return;
9523
9524 speed_cap = pcie_get_speed_cap(root);
9525 if (speed_cap == PCI_SPEED_UNKNOWN)
9526 return;
9527
9528 if ((speed_cap != PCIE_SPEED_8_0GT) &&
9529 (speed_cap != PCIE_SPEED_5_0GT))
9530 return;
9531
9532 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9533 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9534 LC_CURRENT_DATA_RATE_SHIFT;
9535 if (speed_cap == PCIE_SPEED_8_0GT) {
9536 if (current_data_rate == 2) {
9537 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9538 return;
9539 }
9540 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9541 } else if (speed_cap == PCIE_SPEED_5_0GT) {
9542 if (current_data_rate == 1) {
9543 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9544 return;
9545 }
9546 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9547 }
9548
9549 bridge_pos = pci_pcie_cap(root);
9550 if (!bridge_pos)
9551 return;
9552
9553 gpu_pos = pci_pcie_cap(rdev->pdev);
9554 if (!gpu_pos)
9555 return;
9556
9557 if (speed_cap == PCIE_SPEED_8_0GT) {
9558 /* re-try equalization if gen3 is not already enabled */
9559 if (current_data_rate != 2) {
9560 u16 bridge_cfg, gpu_cfg;
9561 u16 bridge_cfg2, gpu_cfg2;
9562 u32 max_lw, current_lw, tmp;
9563
9564 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9565 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9566
9567 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9568 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9569
9570 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9571 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9572
9573 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9574 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9575 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9576
9577 if (current_lw < max_lw) {
9578 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9579 if (tmp & LC_RENEGOTIATION_SUPPORT) {
9580 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9581 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9582 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9583 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9584 }
9585 }
9586
9587 for (i = 0; i < 10; i++) {
9588 /* check status */
9589 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9590 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9591 break;
9592
9593 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9594 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9595
9596 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9597 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9598
9599 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9600 tmp |= LC_SET_QUIESCE;
9601 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9602
9603 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9604 tmp |= LC_REDO_EQ;
9605 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9606
9607 msleep(100);
9608
9609 /* linkctl */
9610 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9611 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9612 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9613 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9614
9615 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9616 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9617 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9618 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9619
9620 /* linkctl2 */
9621 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9622 tmp16 &= ~((1 << 4) | (7 << 9));
9623 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9624 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9625
9626 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9627 tmp16 &= ~((1 << 4) | (7 << 9));
9628 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9629 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9630
9631 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9632 tmp &= ~LC_SET_QUIESCE;
9633 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9634 }
9635 }
9636 }
9637
9638 /* set the link speed */
9639 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9640 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9641 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9642
9643 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9644 tmp16 &= ~0xf;
9645 if (speed_cap == PCIE_SPEED_8_0GT)
9646 tmp16 |= 3; /* gen3 */
9647 else if (speed_cap == PCIE_SPEED_5_0GT)
9648 tmp16 |= 2; /* gen2 */
9649 else
9650 tmp16 |= 1; /* gen1 */
9651 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9652
9653 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9654 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9655 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9656
9657 for (i = 0; i < rdev->usec_timeout; i++) {
9658 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9659 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9660 break;
9661 udelay(1);
9662 }
9663}
9664
9665static void cik_program_aspm(struct radeon_device *rdev)
9666{
9667 u32 data, orig;
9668 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9669 bool disable_clkreq = false;
9670
9671 if (radeon_aspm == 0)
9672 return;
9673
9674 /* XXX double check IGPs */
9675 if (rdev->flags & RADEON_IS_IGP)
9676 return;
9677
9678 if (!(rdev->flags & RADEON_IS_PCIE))
9679 return;
9680
9681 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9682 data &= ~LC_XMIT_N_FTS_MASK;
9683 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9684 if (orig != data)
9685 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9686
9687 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9688 data |= LC_GO_TO_RECOVERY;
9689 if (orig != data)
9690 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9691
9692 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9693 data |= P_IGNORE_EDB_ERR;
9694 if (orig != data)
9695 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9696
9697 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9698 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9699 data |= LC_PMI_TO_L1_DIS;
9700 if (!disable_l0s)
9701 data |= LC_L0S_INACTIVITY(7);
9702
9703 if (!disable_l1) {
9704 data |= LC_L1_INACTIVITY(7);
9705 data &= ~LC_PMI_TO_L1_DIS;
9706 if (orig != data)
9707 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9708
9709 if (!disable_plloff_in_l1) {
9710 bool clk_req_support;
9711
9712 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9713 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9714 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9715 if (orig != data)
9716 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9717
9718 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9719 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9720 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9721 if (orig != data)
9722 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9723
9724 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9725 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9726 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9727 if (orig != data)
9728 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9729
9730 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9731 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9732 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9733 if (orig != data)
9734 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9735
9736 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9737 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9738 data |= LC_DYN_LANES_PWR_STATE(3);
9739 if (orig != data)
9740 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9741
9742 if (!disable_clkreq &&
9743 !pci_is_root_bus(rdev->pdev->bus)) {
9744 struct pci_dev *root = rdev->pdev->bus->self;
9745 u32 lnkcap;
9746
9747 clk_req_support = false;
9748 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9749 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9750 clk_req_support = true;
9751 } else {
9752 clk_req_support = false;
9753 }
9754
9755 if (clk_req_support) {
9756 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9757 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9758 if (orig != data)
9759 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9760
9761 orig = data = RREG32_SMC(THM_CLK_CNTL);
9762 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9763 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9764 if (orig != data)
9765 WREG32_SMC(THM_CLK_CNTL, data);
9766
9767 orig = data = RREG32_SMC(MISC_CLK_CTRL);
9768 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9769 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9770 if (orig != data)
9771 WREG32_SMC(MISC_CLK_CTRL, data);
9772
9773 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9774 data &= ~BCLK_AS_XCLK;
9775 if (orig != data)
9776 WREG32_SMC(CG_CLKPIN_CNTL, data);
9777
9778 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9779 data &= ~FORCE_BIF_REFCLK_EN;
9780 if (orig != data)
9781 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9782
9783 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9784 data &= ~MPLL_CLKOUT_SEL_MASK;
9785 data |= MPLL_CLKOUT_SEL(4);
9786 if (orig != data)
9787 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9788 }
9789 }
9790 } else {
9791 if (orig != data)
9792 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9793 }
9794
9795 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9796 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9797 if (orig != data)
9798 WREG32_PCIE_PORT(PCIE_CNTL2, data);
9799
9800 if (!disable_l0s) {
9801 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9802 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9803 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9804 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9805 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9806 data &= ~LC_L0S_INACTIVITY_MASK;
9807 if (orig != data)
9808 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9809 }
9810 }
9811 }
9812}