Loading...
1/*
2 * PowerNV setup code.
3 *
4 * Copyright 2011 IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#undef DEBUG
13
14#include <linux/cpu.h>
15#include <linux/errno.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/tty.h>
19#include <linux/reboot.h>
20#include <linux/init.h>
21#include <linux/console.h>
22#include <linux/delay.h>
23#include <linux/irq.h>
24#include <linux/seq_file.h>
25#include <linux/of.h>
26#include <linux/of_fdt.h>
27#include <linux/interrupt.h>
28#include <linux/bug.h>
29#include <linux/pci.h>
30
31#include <asm/machdep.h>
32#include <asm/firmware.h>
33#include <asm/xics.h>
34#include <asm/rtas.h>
35#include <asm/opal.h>
36#include <asm/kexec.h>
37
38#include "powernv.h"
39
40static void __init pnv_setup_arch(void)
41{
42 /* Initialize SMP */
43 pnv_smp_init();
44
45 /* Setup PCI */
46 pnv_pci_init();
47
48 /* Setup RTC and NVRAM callbacks */
49 if (firmware_has_feature(FW_FEATURE_OPAL))
50 opal_nvram_init();
51
52 /* Enable NAP mode */
53 powersave_nap = 1;
54
55 /* XXX PMCS */
56}
57
58static void __init pnv_init_early(void)
59{
60 /*
61 * Initialize the LPC bus now so that legacy serial
62 * ports can be found on it
63 */
64 opal_lpc_init();
65
66#ifdef CONFIG_HVC_OPAL
67 if (firmware_has_feature(FW_FEATURE_OPAL))
68 hvc_opal_init_early();
69 else
70#endif
71 add_preferred_console("hvc", 0, NULL);
72}
73
74static void __init pnv_init_IRQ(void)
75{
76 xics_init();
77
78 WARN_ON(!ppc_md.get_irq);
79}
80
81static void pnv_show_cpuinfo(struct seq_file *m)
82{
83 struct device_node *root;
84 const char *model = "";
85
86 root = of_find_node_by_path("/");
87 if (root)
88 model = of_get_property(root, "model", NULL);
89 seq_printf(m, "machine\t\t: PowerNV %s\n", model);
90 if (firmware_has_feature(FW_FEATURE_OPALv3))
91 seq_printf(m, "firmware\t: OPAL v3\n");
92 else if (firmware_has_feature(FW_FEATURE_OPALv2))
93 seq_printf(m, "firmware\t: OPAL v2\n");
94 else if (firmware_has_feature(FW_FEATURE_OPAL))
95 seq_printf(m, "firmware\t: OPAL v1\n");
96 else
97 seq_printf(m, "firmware\t: BML\n");
98 of_node_put(root);
99}
100
101static void __noreturn pnv_restart(char *cmd)
102{
103 long rc = OPAL_BUSY;
104
105 opal_notifier_disable();
106
107 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
108 rc = opal_cec_reboot();
109 if (rc == OPAL_BUSY_EVENT)
110 opal_poll_events(NULL);
111 else
112 mdelay(10);
113 }
114 for (;;)
115 opal_poll_events(NULL);
116}
117
118static void __noreturn pnv_power_off(void)
119{
120 long rc = OPAL_BUSY;
121
122 opal_notifier_disable();
123
124 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
125 rc = opal_cec_power_down(0);
126 if (rc == OPAL_BUSY_EVENT)
127 opal_poll_events(NULL);
128 else
129 mdelay(10);
130 }
131 for (;;)
132 opal_poll_events(NULL);
133}
134
135static void __noreturn pnv_halt(void)
136{
137 pnv_power_off();
138}
139
140static void pnv_progress(char *s, unsigned short hex)
141{
142}
143
144static int pnv_dma_set_mask(struct device *dev, u64 dma_mask)
145{
146 if (dev_is_pci(dev))
147 return pnv_pci_dma_set_mask(to_pci_dev(dev), dma_mask);
148 return __dma_set_mask(dev, dma_mask);
149}
150
151static void pnv_shutdown(void)
152{
153 /* Let the PCI code clear up IODA tables */
154 pnv_pci_shutdown();
155
156 /*
157 * Stop OPAL activity: Unregister all OPAL interrupts so they
158 * don't fire up while we kexec and make sure all potentially
159 * DMA'ing ops are complete (such as dump retrieval).
160 */
161 opal_shutdown();
162}
163
164#ifdef CONFIG_KEXEC
165static void pnv_kexec_wait_secondaries_down(void)
166{
167 int my_cpu, i, notified = -1;
168
169 my_cpu = get_cpu();
170
171 for_each_online_cpu(i) {
172 uint8_t status;
173 int64_t rc;
174
175 if (i == my_cpu)
176 continue;
177
178 for (;;) {
179 rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
180 &status);
181 if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
182 break;
183 barrier();
184 if (i != notified) {
185 printk(KERN_INFO "kexec: waiting for cpu %d "
186 "(physical %d) to enter OPAL\n",
187 i, paca[i].hw_cpu_id);
188 notified = i;
189 }
190 }
191 }
192}
193
194static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
195{
196 xics_kexec_teardown_cpu(secondary);
197
198 /* On OPAL v3, we return all CPUs to firmware */
199
200 if (!firmware_has_feature(FW_FEATURE_OPALv3))
201 return;
202
203 if (secondary) {
204 /* Return secondary CPUs to firmware on OPAL v3 */
205 mb();
206 get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
207 mb();
208
209 /* Return the CPU to OPAL */
210 opal_return_cpu();
211 } else if (crash_shutdown) {
212 /*
213 * On crash, we don't wait for secondaries to go
214 * down as they might be unreachable or hung, so
215 * instead we just wait a bit and move on.
216 */
217 mdelay(1);
218 } else {
219 /* Primary waits for the secondaries to have reached OPAL */
220 pnv_kexec_wait_secondaries_down();
221 }
222}
223#endif /* CONFIG_KEXEC */
224
225static void __init pnv_setup_machdep_opal(void)
226{
227 ppc_md.get_boot_time = opal_get_boot_time;
228 ppc_md.get_rtc_time = opal_get_rtc_time;
229 ppc_md.set_rtc_time = opal_set_rtc_time;
230 ppc_md.restart = pnv_restart;
231 ppc_md.power_off = pnv_power_off;
232 ppc_md.halt = pnv_halt;
233 ppc_md.machine_check_exception = opal_machine_check;
234 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
235}
236
237#ifdef CONFIG_PPC_POWERNV_RTAS
238static void __init pnv_setup_machdep_rtas(void)
239{
240 if (rtas_token("get-time-of-day") != RTAS_UNKNOWN_SERVICE) {
241 ppc_md.get_boot_time = rtas_get_boot_time;
242 ppc_md.get_rtc_time = rtas_get_rtc_time;
243 ppc_md.set_rtc_time = rtas_set_rtc_time;
244 }
245 ppc_md.restart = rtas_restart;
246 ppc_md.power_off = rtas_power_off;
247 ppc_md.halt = rtas_halt;
248}
249#endif /* CONFIG_PPC_POWERNV_RTAS */
250
251static int __init pnv_probe(void)
252{
253 unsigned long root = of_get_flat_dt_root();
254
255 if (!of_flat_dt_is_compatible(root, "ibm,powernv"))
256 return 0;
257
258 hpte_init_native();
259
260 if (firmware_has_feature(FW_FEATURE_OPAL))
261 pnv_setup_machdep_opal();
262#ifdef CONFIG_PPC_POWERNV_RTAS
263 else if (rtas.base)
264 pnv_setup_machdep_rtas();
265#endif /* CONFIG_PPC_POWERNV_RTAS */
266
267 pr_debug("PowerNV detected !\n");
268
269 return 1;
270}
271
272define_machine(powernv) {
273 .name = "PowerNV",
274 .probe = pnv_probe,
275 .init_early = pnv_init_early,
276 .setup_arch = pnv_setup_arch,
277 .init_IRQ = pnv_init_IRQ,
278 .show_cpuinfo = pnv_show_cpuinfo,
279 .progress = pnv_progress,
280 .machine_shutdown = pnv_shutdown,
281 .power_save = power7_idle,
282 .calibrate_decr = generic_calibrate_decr,
283 .dma_set_mask = pnv_dma_set_mask,
284#ifdef CONFIG_KEXEC
285 .kexec_cpu_down = pnv_kexec_cpu_down,
286#endif
287};
1/*
2 * PowerNV setup code.
3 *
4 * Copyright 2011 IBM Corp.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#undef DEBUG
13
14#include <linux/cpu.h>
15#include <linux/errno.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/tty.h>
19#include <linux/reboot.h>
20#include <linux/init.h>
21#include <linux/console.h>
22#include <linux/delay.h>
23#include <linux/irq.h>
24#include <linux/seq_file.h>
25#include <linux/of.h>
26#include <linux/of_fdt.h>
27#include <linux/interrupt.h>
28#include <linux/bug.h>
29#include <linux/pci.h>
30#include <linux/cpufreq.h>
31
32#include <asm/machdep.h>
33#include <asm/firmware.h>
34#include <asm/xics.h>
35#include <asm/xive.h>
36#include <asm/opal.h>
37#include <asm/kexec.h>
38#include <asm/smp.h>
39#include <asm/tm.h>
40#include <asm/setup.h>
41#include <asm/security_features.h>
42
43#include "powernv.h"
44
45
46static bool fw_feature_is(const char *state, const char *name,
47 struct device_node *fw_features)
48{
49 struct device_node *np;
50 bool rc = false;
51
52 np = of_get_child_by_name(fw_features, name);
53 if (np) {
54 rc = of_property_read_bool(np, state);
55 of_node_put(np);
56 }
57
58 return rc;
59}
60
61static void init_fw_feat_flags(struct device_node *np)
62{
63 if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
64 security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
65
66 if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
67 security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
68
69 if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
70 security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
71
72 if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
73 security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
74
75 if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
76 security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
77
78 if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
79 security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
80
81 /*
82 * The features below are enabled by default, so we instead look to see
83 * if firmware has *disabled* them, and clear them if so.
84 */
85 if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
86 security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
87
88 if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
89 security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
90
91 if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
92 security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
93
94 if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
95 security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
96}
97
98static void pnv_setup_rfi_flush(void)
99{
100 struct device_node *np, *fw_features;
101 enum l1d_flush_type type;
102 bool enable;
103
104 /* Default to fallback in case fw-features are not available */
105 type = L1D_FLUSH_FALLBACK;
106
107 np = of_find_node_by_name(NULL, "ibm,opal");
108 fw_features = of_get_child_by_name(np, "fw-features");
109 of_node_put(np);
110
111 if (fw_features) {
112 init_fw_feat_flags(fw_features);
113 of_node_put(fw_features);
114
115 if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
116 type = L1D_FLUSH_MTTRIG;
117
118 if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
119 type = L1D_FLUSH_ORI;
120 }
121
122 enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
123 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
124 security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
125
126 setup_rfi_flush(type, enable);
127}
128
129static void __init pnv_setup_arch(void)
130{
131 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
132
133 pnv_setup_rfi_flush();
134 setup_stf_barrier();
135
136 /* Initialize SMP */
137 pnv_smp_init();
138
139 /* Setup PCI */
140 pnv_pci_init();
141
142 /* Setup RTC and NVRAM callbacks */
143 if (firmware_has_feature(FW_FEATURE_OPAL))
144 opal_nvram_init();
145
146 /* Enable NAP mode */
147 powersave_nap = 1;
148
149 /* XXX PMCS */
150}
151
152static void __init pnv_init(void)
153{
154 /*
155 * Initialize the LPC bus now so that legacy serial
156 * ports can be found on it
157 */
158 opal_lpc_init();
159
160#ifdef CONFIG_HVC_OPAL
161 if (firmware_has_feature(FW_FEATURE_OPAL))
162 hvc_opal_init_early();
163 else
164#endif
165 add_preferred_console("hvc", 0, NULL);
166}
167
168static void __init pnv_init_IRQ(void)
169{
170 /* Try using a XIVE if available, otherwise use a XICS */
171 if (!xive_native_init())
172 xics_init();
173
174 WARN_ON(!ppc_md.get_irq);
175}
176
177static void pnv_show_cpuinfo(struct seq_file *m)
178{
179 struct device_node *root;
180 const char *model = "";
181
182 root = of_find_node_by_path("/");
183 if (root)
184 model = of_get_property(root, "model", NULL);
185 seq_printf(m, "machine\t\t: PowerNV %s\n", model);
186 if (firmware_has_feature(FW_FEATURE_OPAL))
187 seq_printf(m, "firmware\t: OPAL\n");
188 else
189 seq_printf(m, "firmware\t: BML\n");
190 of_node_put(root);
191 if (radix_enabled())
192 seq_printf(m, "MMU\t\t: Radix\n");
193 else
194 seq_printf(m, "MMU\t\t: Hash\n");
195}
196
197static void pnv_prepare_going_down(void)
198{
199 /*
200 * Disable all notifiers from OPAL, we can't
201 * service interrupts anymore anyway
202 */
203 opal_event_shutdown();
204
205 /* Print flash update message if one is scheduled. */
206 opal_flash_update_print_message();
207
208 smp_send_stop();
209
210 hard_irq_disable();
211}
212
213static void __noreturn pnv_restart(char *cmd)
214{
215 long rc = OPAL_BUSY;
216
217 pnv_prepare_going_down();
218
219 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
220 rc = opal_cec_reboot();
221 if (rc == OPAL_BUSY_EVENT)
222 opal_poll_events(NULL);
223 else
224 mdelay(10);
225 }
226 for (;;)
227 opal_poll_events(NULL);
228}
229
230static void __noreturn pnv_power_off(void)
231{
232 long rc = OPAL_BUSY;
233
234 pnv_prepare_going_down();
235
236 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
237 rc = opal_cec_power_down(0);
238 if (rc == OPAL_BUSY_EVENT)
239 opal_poll_events(NULL);
240 else
241 mdelay(10);
242 }
243 for (;;)
244 opal_poll_events(NULL);
245}
246
247static void __noreturn pnv_halt(void)
248{
249 pnv_power_off();
250}
251
252static void pnv_progress(char *s, unsigned short hex)
253{
254}
255
256static void pnv_shutdown(void)
257{
258 /* Let the PCI code clear up IODA tables */
259 pnv_pci_shutdown();
260
261 /*
262 * Stop OPAL activity: Unregister all OPAL interrupts so they
263 * don't fire up while we kexec and make sure all potentially
264 * DMA'ing ops are complete (such as dump retrieval).
265 */
266 opal_shutdown();
267}
268
269#ifdef CONFIG_KEXEC_CORE
270static void pnv_kexec_wait_secondaries_down(void)
271{
272 int my_cpu, i, notified = -1;
273
274 my_cpu = get_cpu();
275
276 for_each_online_cpu(i) {
277 uint8_t status;
278 int64_t rc, timeout = 1000;
279
280 if (i == my_cpu)
281 continue;
282
283 for (;;) {
284 rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
285 &status);
286 if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
287 break;
288 barrier();
289 if (i != notified) {
290 printk(KERN_INFO "kexec: waiting for cpu %d "
291 "(physical %d) to enter OPAL\n",
292 i, paca_ptrs[i]->hw_cpu_id);
293 notified = i;
294 }
295
296 /*
297 * On crash secondaries might be unreachable or hung,
298 * so timeout if we've waited too long
299 * */
300 mdelay(1);
301 if (timeout-- == 0) {
302 printk(KERN_ERR "kexec: timed out waiting for "
303 "cpu %d (physical %d) to enter OPAL\n",
304 i, paca_ptrs[i]->hw_cpu_id);
305 break;
306 }
307 }
308 }
309}
310
311static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
312{
313 u64 reinit_flags;
314
315 if (xive_enabled())
316 xive_kexec_teardown_cpu(secondary);
317 else
318 xics_kexec_teardown_cpu(secondary);
319
320 /* On OPAL, we return all CPUs to firmware */
321 if (!firmware_has_feature(FW_FEATURE_OPAL))
322 return;
323
324 if (secondary) {
325 /* Return secondary CPUs to firmware on OPAL v3 */
326 mb();
327 get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
328 mb();
329
330 /* Return the CPU to OPAL */
331 opal_return_cpu();
332 } else {
333 /* Primary waits for the secondaries to have reached OPAL */
334 pnv_kexec_wait_secondaries_down();
335
336 /* Switch XIVE back to emulation mode */
337 if (xive_enabled())
338 xive_shutdown();
339
340 /*
341 * We might be running as little-endian - now that interrupts
342 * are disabled, reset the HILE bit to big-endian so we don't
343 * take interrupts in the wrong endian later
344 *
345 * We reinit to enable both radix and hash on P9 to ensure
346 * the mode used by the next kernel is always supported.
347 */
348 reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
349 if (cpu_has_feature(CPU_FTR_ARCH_300))
350 reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
351 OPAL_REINIT_CPUS_MMU_HASH;
352 opal_reinit_cpus(reinit_flags);
353 }
354}
355#endif /* CONFIG_KEXEC_CORE */
356
357#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
358static unsigned long pnv_memory_block_size(void)
359{
360 /*
361 * We map the kernel linear region with 1GB large pages on radix. For
362 * memory hot unplug to work our memory block size must be at least
363 * this size.
364 */
365 if (radix_enabled())
366 return 1UL * 1024 * 1024 * 1024;
367 else
368 return 256UL * 1024 * 1024;
369}
370#endif
371
372static void __init pnv_setup_machdep_opal(void)
373{
374 ppc_md.get_boot_time = opal_get_boot_time;
375 ppc_md.restart = pnv_restart;
376 pm_power_off = pnv_power_off;
377 ppc_md.halt = pnv_halt;
378 /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
379 ppc_md.machine_check_exception = opal_machine_check;
380 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
381 ppc_md.hmi_exception_early = opal_hmi_exception_early;
382 ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
383}
384
385static int __init pnv_probe(void)
386{
387 if (!of_machine_is_compatible("ibm,powernv"))
388 return 0;
389
390 if (firmware_has_feature(FW_FEATURE_OPAL))
391 pnv_setup_machdep_opal();
392
393 pr_debug("PowerNV detected !\n");
394
395 pnv_init();
396
397 return 1;
398}
399
400#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
401void __init pnv_tm_init(void)
402{
403 if (!firmware_has_feature(FW_FEATURE_OPAL) ||
404 !pvr_version_is(PVR_POWER9) ||
405 early_cpu_has_feature(CPU_FTR_TM))
406 return;
407
408 if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
409 return;
410
411 pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
412 cur_cpu_spec->cpu_features |= CPU_FTR_TM;
413 /* Make sure "normal" HTM is off (it should be) */
414 cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
415 /* Turn on no suspend mode, and HTM no SC */
416 cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
417 PPC_FEATURE2_HTM_NOSC;
418 tm_suspend_disabled = true;
419}
420#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
421
422/*
423 * Returns the cpu frequency for 'cpu' in Hz. This is used by
424 * /proc/cpuinfo
425 */
426static unsigned long pnv_get_proc_freq(unsigned int cpu)
427{
428 unsigned long ret_freq;
429
430 ret_freq = cpufreq_get(cpu) * 1000ul;
431
432 /*
433 * If the backend cpufreq driver does not exist,
434 * then fallback to old way of reporting the clockrate.
435 */
436 if (!ret_freq)
437 ret_freq = ppc_proc_freq;
438 return ret_freq;
439}
440
441define_machine(powernv) {
442 .name = "PowerNV",
443 .probe = pnv_probe,
444 .setup_arch = pnv_setup_arch,
445 .init_IRQ = pnv_init_IRQ,
446 .show_cpuinfo = pnv_show_cpuinfo,
447 .get_proc_freq = pnv_get_proc_freq,
448 .progress = pnv_progress,
449 .machine_shutdown = pnv_shutdown,
450 .power_save = NULL,
451 .calibrate_decr = generic_calibrate_decr,
452#ifdef CONFIG_KEXEC_CORE
453 .kexec_cpu_down = pnv_kexec_cpu_down,
454#endif
455#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
456 .memory_block_size = pnv_memory_block_size,
457#endif
458};