Loading...
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Architecture neutral utility routines for interacting with
5 * Hyper-V. This file is specifically for code that must be
6 * built-in to the kernel image when CONFIG_HYPERV is set
7 * (vs. being in a module) because it is called from architecture
8 * specific code under arch/.
9 *
10 * Copyright (C) 2021, Microsoft, Inc.
11 *
12 * Author : Michael Kelley <mikelley@microsoft.com>
13 */
14
15#include <linux/types.h>
16#include <linux/acpi.h>
17#include <linux/export.h>
18#include <linux/bitfield.h>
19#include <linux/cpumask.h>
20#include <linux/sched/task_stack.h>
21#include <linux/panic_notifier.h>
22#include <linux/ptrace.h>
23#include <linux/kdebug.h>
24#include <linux/kmsg_dump.h>
25#include <linux/slab.h>
26#include <linux/dma-map-ops.h>
27#include <linux/set_memory.h>
28#include <asm/hyperv-tlfs.h>
29#include <asm/mshyperv.h>
30
31/*
32 * hv_root_partition, ms_hyperv and hv_nested are defined here with other
33 * Hyper-V specific globals so they are shared across all architectures and are
34 * built only when CONFIG_HYPERV is defined. But on x86,
35 * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
36 * defined, and it uses these three variables. So mark them as __weak
37 * here, allowing for an overriding definition in the module containing
38 * ms_hyperv_init_platform().
39 */
40bool __weak hv_root_partition;
41EXPORT_SYMBOL_GPL(hv_root_partition);
42
43bool __weak hv_nested;
44EXPORT_SYMBOL_GPL(hv_nested);
45
46struct ms_hyperv_info __weak ms_hyperv;
47EXPORT_SYMBOL_GPL(ms_hyperv);
48
49u32 *hv_vp_index;
50EXPORT_SYMBOL_GPL(hv_vp_index);
51
52u32 hv_max_vp_index;
53EXPORT_SYMBOL_GPL(hv_max_vp_index);
54
55void * __percpu *hyperv_pcpu_input_arg;
56EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
57
58void * __percpu *hyperv_pcpu_output_arg;
59EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
60
61static void hv_kmsg_dump_unregister(void);
62
63static struct ctl_table_header *hv_ctl_table_hdr;
64
65/*
66 * Hyper-V specific initialization and shutdown code that is
67 * common across all architectures. Called from architecture
68 * specific initialization functions.
69 */
70
71void __init hv_common_free(void)
72{
73 unregister_sysctl_table(hv_ctl_table_hdr);
74 hv_ctl_table_hdr = NULL;
75
76 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
77 hv_kmsg_dump_unregister();
78
79 kfree(hv_vp_index);
80 hv_vp_index = NULL;
81
82 free_percpu(hyperv_pcpu_output_arg);
83 hyperv_pcpu_output_arg = NULL;
84
85 free_percpu(hyperv_pcpu_input_arg);
86 hyperv_pcpu_input_arg = NULL;
87}
88
89/*
90 * Functions for allocating and freeing memory with size and
91 * alignment HV_HYP_PAGE_SIZE. These functions are needed because
92 * the guest page size may not be the same as the Hyper-V page
93 * size. We depend upon kmalloc() aligning power-of-two size
94 * allocations to the allocation size boundary, so that the
95 * allocated memory appears to Hyper-V as a page of the size
96 * it expects.
97 */
98
99void *hv_alloc_hyperv_page(void)
100{
101 BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
102
103 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
104 return (void *)__get_free_page(GFP_KERNEL);
105 else
106 return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
107}
108EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
109
110void *hv_alloc_hyperv_zeroed_page(void)
111{
112 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
113 return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
114 else
115 return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
116}
117EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
118
119void hv_free_hyperv_page(void *addr)
120{
121 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
122 free_page((unsigned long)addr);
123 else
124 kfree(addr);
125}
126EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
127
128static void *hv_panic_page;
129
130/*
131 * Boolean to control whether to report panic messages over Hyper-V.
132 *
133 * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
134 */
135static int sysctl_record_panic_msg = 1;
136
137/*
138 * sysctl option to allow the user to control whether kmsg data should be
139 * reported to Hyper-V on panic.
140 */
141static struct ctl_table hv_ctl_table[] = {
142 {
143 .procname = "hyperv_record_panic_msg",
144 .data = &sysctl_record_panic_msg,
145 .maxlen = sizeof(int),
146 .mode = 0644,
147 .proc_handler = proc_dointvec_minmax,
148 .extra1 = SYSCTL_ZERO,
149 .extra2 = SYSCTL_ONE
150 },
151};
152
153static int hv_die_panic_notify_crash(struct notifier_block *self,
154 unsigned long val, void *args);
155
156static struct notifier_block hyperv_die_report_block = {
157 .notifier_call = hv_die_panic_notify_crash,
158};
159
160static struct notifier_block hyperv_panic_report_block = {
161 .notifier_call = hv_die_panic_notify_crash,
162};
163
164/*
165 * The following callback works both as die and panic notifier; its
166 * goal is to provide panic information to the hypervisor unless the
167 * kmsg dumper is used [see hv_kmsg_dump()], which provides more
168 * information but isn't always available.
169 *
170 * Notice that both the panic/die report notifiers are registered only
171 * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
172 */
173static int hv_die_panic_notify_crash(struct notifier_block *self,
174 unsigned long val, void *args)
175{
176 struct pt_regs *regs;
177 bool is_die;
178
179 /* Don't notify Hyper-V unless we have a die oops event or panic. */
180 if (self == &hyperv_panic_report_block) {
181 is_die = false;
182 regs = current_pt_regs();
183 } else { /* die event */
184 if (val != DIE_OOPS)
185 return NOTIFY_DONE;
186
187 is_die = true;
188 regs = ((struct die_args *)args)->regs;
189 }
190
191 /*
192 * Hyper-V should be notified only once about a panic/die. If we will
193 * be calling hv_kmsg_dump() later with kmsg data, don't do the
194 * notification here.
195 */
196 if (!sysctl_record_panic_msg || !hv_panic_page)
197 hyperv_report_panic(regs, val, is_die);
198
199 return NOTIFY_DONE;
200}
201
202/*
203 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
204 * buffer and call into Hyper-V to transfer the data.
205 */
206static void hv_kmsg_dump(struct kmsg_dumper *dumper,
207 enum kmsg_dump_reason reason)
208{
209 struct kmsg_dump_iter iter;
210 size_t bytes_written;
211
212 /* We are only interested in panics. */
213 if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
214 return;
215
216 /*
217 * Write dump contents to the page. No need to synchronize; panic should
218 * be single-threaded.
219 */
220 kmsg_dump_rewind(&iter);
221 kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
222 &bytes_written);
223 if (!bytes_written)
224 return;
225 /*
226 * P3 to contain the physical address of the panic page & P4 to
227 * contain the size of the panic data in that page. Rest of the
228 * registers are no-op when the NOTIFY_MSG flag is set.
229 */
230 hv_set_register(HV_REGISTER_CRASH_P0, 0);
231 hv_set_register(HV_REGISTER_CRASH_P1, 0);
232 hv_set_register(HV_REGISTER_CRASH_P2, 0);
233 hv_set_register(HV_REGISTER_CRASH_P3, virt_to_phys(hv_panic_page));
234 hv_set_register(HV_REGISTER_CRASH_P4, bytes_written);
235
236 /*
237 * Let Hyper-V know there is crash data available along with
238 * the panic message.
239 */
240 hv_set_register(HV_REGISTER_CRASH_CTL,
241 (HV_CRASH_CTL_CRASH_NOTIFY |
242 HV_CRASH_CTL_CRASH_NOTIFY_MSG));
243}
244
245static struct kmsg_dumper hv_kmsg_dumper = {
246 .dump = hv_kmsg_dump,
247};
248
249static void hv_kmsg_dump_unregister(void)
250{
251 kmsg_dump_unregister(&hv_kmsg_dumper);
252 unregister_die_notifier(&hyperv_die_report_block);
253 atomic_notifier_chain_unregister(&panic_notifier_list,
254 &hyperv_panic_report_block);
255
256 hv_free_hyperv_page(hv_panic_page);
257 hv_panic_page = NULL;
258}
259
260static void hv_kmsg_dump_register(void)
261{
262 int ret;
263
264 hv_panic_page = hv_alloc_hyperv_zeroed_page();
265 if (!hv_panic_page) {
266 pr_err("Hyper-V: panic message page memory allocation failed\n");
267 return;
268 }
269
270 ret = kmsg_dump_register(&hv_kmsg_dumper);
271 if (ret) {
272 pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
273 hv_free_hyperv_page(hv_panic_page);
274 hv_panic_page = NULL;
275 }
276}
277
278int __init hv_common_init(void)
279{
280 int i;
281
282 if (hv_is_isolation_supported())
283 sysctl_record_panic_msg = 0;
284
285 /*
286 * Hyper-V expects to get crash register data or kmsg when
287 * crash enlightment is available and system crashes. Set
288 * crash_kexec_post_notifiers to be true to make sure that
289 * calling crash enlightment interface before running kdump
290 * kernel.
291 */
292 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
293 u64 hyperv_crash_ctl;
294
295 crash_kexec_post_notifiers = true;
296 pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
297
298 /*
299 * Panic message recording (sysctl_record_panic_msg)
300 * is enabled by default in non-isolated guests and
301 * disabled by default in isolated guests; the panic
302 * message recording won't be available in isolated
303 * guests should the following registration fail.
304 */
305 hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
306 if (!hv_ctl_table_hdr)
307 pr_err("Hyper-V: sysctl table register error");
308
309 /*
310 * Register for panic kmsg callback only if the right
311 * capability is supported by the hypervisor.
312 */
313 hyperv_crash_ctl = hv_get_register(HV_REGISTER_CRASH_CTL);
314 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
315 hv_kmsg_dump_register();
316
317 register_die_notifier(&hyperv_die_report_block);
318 atomic_notifier_chain_register(&panic_notifier_list,
319 &hyperv_panic_report_block);
320 }
321
322 /*
323 * Allocate the per-CPU state for the hypercall input arg.
324 * If this allocation fails, we will not be able to setup
325 * (per-CPU) hypercall input page and thus this failure is
326 * fatal on Hyper-V.
327 */
328 hyperv_pcpu_input_arg = alloc_percpu(void *);
329 BUG_ON(!hyperv_pcpu_input_arg);
330
331 /* Allocate the per-CPU state for output arg for root */
332 if (hv_root_partition) {
333 hyperv_pcpu_output_arg = alloc_percpu(void *);
334 BUG_ON(!hyperv_pcpu_output_arg);
335 }
336
337 hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
338 GFP_KERNEL);
339 if (!hv_vp_index) {
340 hv_common_free();
341 return -ENOMEM;
342 }
343
344 for (i = 0; i < num_possible_cpus(); i++)
345 hv_vp_index[i] = VP_INVAL;
346
347 return 0;
348}
349
350/*
351 * Hyper-V specific initialization and die code for
352 * individual CPUs that is common across all architectures.
353 * Called by the CPU hotplug mechanism.
354 */
355
356int hv_common_cpu_init(unsigned int cpu)
357{
358 void **inputarg, **outputarg;
359 u64 msr_vp_index;
360 gfp_t flags;
361 int pgcount = hv_root_partition ? 2 : 1;
362 void *mem;
363 int ret;
364
365 /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
366 flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
367
368 inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
369
370 /*
371 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
372 * allocated if this CPU was previously online and then taken offline
373 */
374 if (!*inputarg) {
375 mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
376 if (!mem)
377 return -ENOMEM;
378
379 if (hv_root_partition) {
380 outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
381 *outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
382 }
383
384 if (!ms_hyperv.paravisor_present &&
385 (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
386 ret = set_memory_decrypted((unsigned long)mem, pgcount);
387 if (ret) {
388 /* It may be unsafe to free 'mem' */
389 return ret;
390 }
391
392 memset(mem, 0x00, pgcount * HV_HYP_PAGE_SIZE);
393 }
394
395 /*
396 * In a fully enlightened TDX/SNP VM with more than 64 VPs, if
397 * hyperv_pcpu_input_arg is not NULL, set_memory_decrypted() ->
398 * ... -> cpa_flush()-> ... -> __send_ipi_mask_ex() tries to
399 * use hyperv_pcpu_input_arg as the hypercall input page, which
400 * must be a decrypted page in such a VM, but the page is still
401 * encrypted before set_memory_decrypted() returns. Fix this by
402 * setting *inputarg after the above set_memory_decrypted(): if
403 * hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns
404 * HV_STATUS_INVALID_PARAMETER immediately, and the function
405 * hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(),
406 * which may be slightly slower than the hypercall, but still
407 * works correctly in such a VM.
408 */
409 *inputarg = mem;
410 }
411
412 msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
413
414 hv_vp_index[cpu] = msr_vp_index;
415
416 if (msr_vp_index > hv_max_vp_index)
417 hv_max_vp_index = msr_vp_index;
418
419 return 0;
420}
421
422int hv_common_cpu_die(unsigned int cpu)
423{
424 /*
425 * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
426 * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
427 * may be used by the Hyper-V vPCI driver in reassigning interrupts
428 * as part of the offlining process. The interrupt reassignment
429 * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
430 * called this function.
431 *
432 * If a previously offlined CPU is brought back online again, the
433 * originally allocated memory is reused in hv_common_cpu_init().
434 */
435
436 return 0;
437}
438
439/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
440bool hv_query_ext_cap(u64 cap_query)
441{
442 /*
443 * The address of the 'hv_extended_cap' variable will be used as an
444 * output parameter to the hypercall below and so it should be
445 * compatible with 'virt_to_phys'. Which means, it's address should be
446 * directly mapped. Use 'static' to keep it compatible; stack variables
447 * can be virtually mapped, making them incompatible with
448 * 'virt_to_phys'.
449 * Hypercall input/output addresses should also be 8-byte aligned.
450 */
451 static u64 hv_extended_cap __aligned(8);
452 static bool hv_extended_cap_queried;
453 u64 status;
454
455 /*
456 * Querying extended capabilities is an extended hypercall. Check if the
457 * partition supports extended hypercall, first.
458 */
459 if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
460 return false;
461
462 /* Extended capabilities do not change at runtime. */
463 if (hv_extended_cap_queried)
464 return hv_extended_cap & cap_query;
465
466 status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
467 &hv_extended_cap);
468
469 /*
470 * The query extended capabilities hypercall should not fail under
471 * any normal circumstances. Avoid repeatedly making the hypercall, on
472 * error.
473 */
474 hv_extended_cap_queried = true;
475 if (!hv_result_success(status)) {
476 pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
477 status);
478 return false;
479 }
480
481 return hv_extended_cap & cap_query;
482}
483EXPORT_SYMBOL_GPL(hv_query_ext_cap);
484
485void hv_setup_dma_ops(struct device *dev, bool coherent)
486{
487 /*
488 * Hyper-V does not offer a vIOMMU in the guest
489 * VM, so pass 0/NULL for the IOMMU settings
490 */
491 arch_setup_dma_ops(dev, 0, 0, coherent);
492}
493EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
494
495bool hv_is_hibernation_supported(void)
496{
497 return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
498}
499EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
500
501/*
502 * Default function to read the Hyper-V reference counter, independent
503 * of whether Hyper-V enlightened clocks/timers are being used. But on
504 * architectures where it is used, Hyper-V enlightenment code in
505 * hyperv_timer.c may override this function.
506 */
507static u64 __hv_read_ref_counter(void)
508{
509 return hv_get_register(HV_REGISTER_TIME_REF_COUNT);
510}
511
512u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter;
513EXPORT_SYMBOL_GPL(hv_read_reference_counter);
514
515/* These __weak functions provide default "no-op" behavior and
516 * may be overridden by architecture specific versions. Architectures
517 * for which the default "no-op" behavior is sufficient can leave
518 * them unimplemented and not be cluttered with a bunch of stub
519 * functions in arch-specific code.
520 */
521
522bool __weak hv_is_isolation_supported(void)
523{
524 return false;
525}
526EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
527
528bool __weak hv_isolation_type_snp(void)
529{
530 return false;
531}
532EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
533
534bool __weak hv_isolation_type_tdx(void)
535{
536 return false;
537}
538EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);
539
540void __weak hv_setup_vmbus_handler(void (*handler)(void))
541{
542}
543EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
544
545void __weak hv_remove_vmbus_handler(void)
546{
547}
548EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
549
550void __weak hv_setup_kexec_handler(void (*handler)(void))
551{
552}
553EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
554
555void __weak hv_remove_kexec_handler(void)
556{
557}
558EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
559
560void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
561{
562}
563EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
564
565void __weak hv_remove_crash_handler(void)
566{
567}
568EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
569
570void __weak hyperv_cleanup(void)
571{
572}
573EXPORT_SYMBOL_GPL(hyperv_cleanup);
574
575u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
576{
577 return HV_STATUS_INVALID_PARAMETER;
578}
579EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
580
581u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
582{
583 return HV_STATUS_INVALID_PARAMETER;
584}
585EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Architecture neutral utility routines for interacting with
5 * Hyper-V. This file is specifically for code that must be
6 * built-in to the kernel image when CONFIG_HYPERV is set
7 * (vs. being in a module) because it is called from architecture
8 * specific code under arch/.
9 *
10 * Copyright (C) 2021, Microsoft, Inc.
11 *
12 * Author : Michael Kelley <mikelley@microsoft.com>
13 */
14
15#include <linux/types.h>
16#include <linux/acpi.h>
17#include <linux/export.h>
18#include <linux/bitfield.h>
19#include <linux/cpumask.h>
20#include <linux/sched/task_stack.h>
21#include <linux/panic_notifier.h>
22#include <linux/ptrace.h>
23#include <linux/random.h>
24#include <linux/efi.h>
25#include <linux/kdebug.h>
26#include <linux/kmsg_dump.h>
27#include <linux/sizes.h>
28#include <linux/slab.h>
29#include <linux/dma-map-ops.h>
30#include <linux/set_memory.h>
31#include <asm/hyperv-tlfs.h>
32#include <asm/mshyperv.h>
33
34/*
35 * hv_root_partition, ms_hyperv and hv_nested are defined here with other
36 * Hyper-V specific globals so they are shared across all architectures and are
37 * built only when CONFIG_HYPERV is defined. But on x86,
38 * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
39 * defined, and it uses these three variables. So mark them as __weak
40 * here, allowing for an overriding definition in the module containing
41 * ms_hyperv_init_platform().
42 */
43bool __weak hv_root_partition;
44EXPORT_SYMBOL_GPL(hv_root_partition);
45
46bool __weak hv_nested;
47EXPORT_SYMBOL_GPL(hv_nested);
48
49struct ms_hyperv_info __weak ms_hyperv;
50EXPORT_SYMBOL_GPL(ms_hyperv);
51
52u32 *hv_vp_index;
53EXPORT_SYMBOL_GPL(hv_vp_index);
54
55u32 hv_max_vp_index;
56EXPORT_SYMBOL_GPL(hv_max_vp_index);
57
58void * __percpu *hyperv_pcpu_input_arg;
59EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
60
61void * __percpu *hyperv_pcpu_output_arg;
62EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
63
64static void hv_kmsg_dump_unregister(void);
65
66static struct ctl_table_header *hv_ctl_table_hdr;
67
68/*
69 * Hyper-V specific initialization and shutdown code that is
70 * common across all architectures. Called from architecture
71 * specific initialization functions.
72 */
73
74void __init hv_common_free(void)
75{
76 unregister_sysctl_table(hv_ctl_table_hdr);
77 hv_ctl_table_hdr = NULL;
78
79 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE)
80 hv_kmsg_dump_unregister();
81
82 kfree(hv_vp_index);
83 hv_vp_index = NULL;
84
85 free_percpu(hyperv_pcpu_output_arg);
86 hyperv_pcpu_output_arg = NULL;
87
88 free_percpu(hyperv_pcpu_input_arg);
89 hyperv_pcpu_input_arg = NULL;
90}
91
92/*
93 * Functions for allocating and freeing memory with size and
94 * alignment HV_HYP_PAGE_SIZE. These functions are needed because
95 * the guest page size may not be the same as the Hyper-V page
96 * size. We depend upon kmalloc() aligning power-of-two size
97 * allocations to the allocation size boundary, so that the
98 * allocated memory appears to Hyper-V as a page of the size
99 * it expects.
100 */
101
102void *hv_alloc_hyperv_page(void)
103{
104 BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
105
106 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
107 return (void *)__get_free_page(GFP_KERNEL);
108 else
109 return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
110}
111EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
112
113void *hv_alloc_hyperv_zeroed_page(void)
114{
115 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
116 return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
117 else
118 return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
119}
120EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
121
122void hv_free_hyperv_page(void *addr)
123{
124 if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
125 free_page((unsigned long)addr);
126 else
127 kfree(addr);
128}
129EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
130
131static void *hv_panic_page;
132
133/*
134 * Boolean to control whether to report panic messages over Hyper-V.
135 *
136 * It can be set via /proc/sys/kernel/hyperv_record_panic_msg
137 */
138static int sysctl_record_panic_msg = 1;
139
140/*
141 * sysctl option to allow the user to control whether kmsg data should be
142 * reported to Hyper-V on panic.
143 */
144static struct ctl_table hv_ctl_table[] = {
145 {
146 .procname = "hyperv_record_panic_msg",
147 .data = &sysctl_record_panic_msg,
148 .maxlen = sizeof(int),
149 .mode = 0644,
150 .proc_handler = proc_dointvec_minmax,
151 .extra1 = SYSCTL_ZERO,
152 .extra2 = SYSCTL_ONE
153 },
154};
155
156static int hv_die_panic_notify_crash(struct notifier_block *self,
157 unsigned long val, void *args);
158
159static struct notifier_block hyperv_die_report_block = {
160 .notifier_call = hv_die_panic_notify_crash,
161};
162
163static struct notifier_block hyperv_panic_report_block = {
164 .notifier_call = hv_die_panic_notify_crash,
165};
166
167/*
168 * The following callback works both as die and panic notifier; its
169 * goal is to provide panic information to the hypervisor unless the
170 * kmsg dumper is used [see hv_kmsg_dump()], which provides more
171 * information but isn't always available.
172 *
173 * Notice that both the panic/die report notifiers are registered only
174 * if we have the capability HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE set.
175 */
176static int hv_die_panic_notify_crash(struct notifier_block *self,
177 unsigned long val, void *args)
178{
179 struct pt_regs *regs;
180 bool is_die;
181
182 /* Don't notify Hyper-V unless we have a die oops event or panic. */
183 if (self == &hyperv_panic_report_block) {
184 is_die = false;
185 regs = current_pt_regs();
186 } else { /* die event */
187 if (val != DIE_OOPS)
188 return NOTIFY_DONE;
189
190 is_die = true;
191 regs = ((struct die_args *)args)->regs;
192 }
193
194 /*
195 * Hyper-V should be notified only once about a panic/die. If we will
196 * be calling hv_kmsg_dump() later with kmsg data, don't do the
197 * notification here.
198 */
199 if (!sysctl_record_panic_msg || !hv_panic_page)
200 hyperv_report_panic(regs, val, is_die);
201
202 return NOTIFY_DONE;
203}
204
205/*
206 * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg
207 * buffer and call into Hyper-V to transfer the data.
208 */
209static void hv_kmsg_dump(struct kmsg_dumper *dumper,
210 struct kmsg_dump_detail *detail)
211{
212 struct kmsg_dump_iter iter;
213 size_t bytes_written;
214
215 /* We are only interested in panics. */
216 if (detail->reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg)
217 return;
218
219 /*
220 * Write dump contents to the page. No need to synchronize; panic should
221 * be single-threaded.
222 */
223 kmsg_dump_rewind(&iter);
224 kmsg_dump_get_buffer(&iter, false, hv_panic_page, HV_HYP_PAGE_SIZE,
225 &bytes_written);
226 if (!bytes_written)
227 return;
228 /*
229 * P3 to contain the physical address of the panic page & P4 to
230 * contain the size of the panic data in that page. Rest of the
231 * registers are no-op when the NOTIFY_MSG flag is set.
232 */
233 hv_set_msr(HV_MSR_CRASH_P0, 0);
234 hv_set_msr(HV_MSR_CRASH_P1, 0);
235 hv_set_msr(HV_MSR_CRASH_P2, 0);
236 hv_set_msr(HV_MSR_CRASH_P3, virt_to_phys(hv_panic_page));
237 hv_set_msr(HV_MSR_CRASH_P4, bytes_written);
238
239 /*
240 * Let Hyper-V know there is crash data available along with
241 * the panic message.
242 */
243 hv_set_msr(HV_MSR_CRASH_CTL,
244 (HV_CRASH_CTL_CRASH_NOTIFY |
245 HV_CRASH_CTL_CRASH_NOTIFY_MSG));
246}
247
248static struct kmsg_dumper hv_kmsg_dumper = {
249 .dump = hv_kmsg_dump,
250};
251
252static void hv_kmsg_dump_unregister(void)
253{
254 kmsg_dump_unregister(&hv_kmsg_dumper);
255 unregister_die_notifier(&hyperv_die_report_block);
256 atomic_notifier_chain_unregister(&panic_notifier_list,
257 &hyperv_panic_report_block);
258
259 hv_free_hyperv_page(hv_panic_page);
260 hv_panic_page = NULL;
261}
262
263static void hv_kmsg_dump_register(void)
264{
265 int ret;
266
267 hv_panic_page = hv_alloc_hyperv_zeroed_page();
268 if (!hv_panic_page) {
269 pr_err("Hyper-V: panic message page memory allocation failed\n");
270 return;
271 }
272
273 ret = kmsg_dump_register(&hv_kmsg_dumper);
274 if (ret) {
275 pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
276 hv_free_hyperv_page(hv_panic_page);
277 hv_panic_page = NULL;
278 }
279}
280
281int __init hv_common_init(void)
282{
283 int i;
284 union hv_hypervisor_version_info version;
285
286 /* Get information about the Hyper-V host version */
287 if (!hv_get_hypervisor_version(&version))
288 pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
289 version.major_version, version.minor_version,
290 version.build_number, version.service_number,
291 version.service_pack, version.service_branch);
292
293 if (hv_is_isolation_supported())
294 sysctl_record_panic_msg = 0;
295
296 /*
297 * Hyper-V expects to get crash register data or kmsg when
298 * crash enlightment is available and system crashes. Set
299 * crash_kexec_post_notifiers to be true to make sure that
300 * calling crash enlightment interface before running kdump
301 * kernel.
302 */
303 if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
304 u64 hyperv_crash_ctl;
305
306 crash_kexec_post_notifiers = true;
307 pr_info("Hyper-V: enabling crash_kexec_post_notifiers\n");
308
309 /*
310 * Panic message recording (sysctl_record_panic_msg)
311 * is enabled by default in non-isolated guests and
312 * disabled by default in isolated guests; the panic
313 * message recording won't be available in isolated
314 * guests should the following registration fail.
315 */
316 hv_ctl_table_hdr = register_sysctl("kernel", hv_ctl_table);
317 if (!hv_ctl_table_hdr)
318 pr_err("Hyper-V: sysctl table register error");
319
320 /*
321 * Register for panic kmsg callback only if the right
322 * capability is supported by the hypervisor.
323 */
324 hyperv_crash_ctl = hv_get_msr(HV_MSR_CRASH_CTL);
325 if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG)
326 hv_kmsg_dump_register();
327
328 register_die_notifier(&hyperv_die_report_block);
329 atomic_notifier_chain_register(&panic_notifier_list,
330 &hyperv_panic_report_block);
331 }
332
333 /*
334 * Allocate the per-CPU state for the hypercall input arg.
335 * If this allocation fails, we will not be able to setup
336 * (per-CPU) hypercall input page and thus this failure is
337 * fatal on Hyper-V.
338 */
339 hyperv_pcpu_input_arg = alloc_percpu(void *);
340 BUG_ON(!hyperv_pcpu_input_arg);
341
342 /* Allocate the per-CPU state for output arg for root */
343 if (hv_root_partition) {
344 hyperv_pcpu_output_arg = alloc_percpu(void *);
345 BUG_ON(!hyperv_pcpu_output_arg);
346 }
347
348 hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
349 GFP_KERNEL);
350 if (!hv_vp_index) {
351 hv_common_free();
352 return -ENOMEM;
353 }
354
355 for (i = 0; i < num_possible_cpus(); i++)
356 hv_vp_index[i] = VP_INVAL;
357
358 return 0;
359}
360
361void __init ms_hyperv_late_init(void)
362{
363 struct acpi_table_header *header;
364 acpi_status status;
365 u8 *randomdata;
366 u32 length, i;
367
368 /*
369 * Seed the Linux random number generator with entropy provided by
370 * the Hyper-V host in ACPI table OEM0.
371 */
372 if (!IS_ENABLED(CONFIG_ACPI))
373 return;
374
375 status = acpi_get_table("OEM0", 0, &header);
376 if (ACPI_FAILURE(status) || !header)
377 return;
378
379 /*
380 * Since the "OEM0" table name is for OEM specific usage, verify
381 * that what we're seeing purports to be from Microsoft.
382 */
383 if (strncmp(header->oem_table_id, "MICROSFT", 8))
384 goto error;
385
386 /*
387 * Ensure the length is reasonable. Requiring at least 8 bytes and
388 * no more than 4K bytes is somewhat arbitrary and just protects
389 * against a malformed table. Hyper-V currently provides 64 bytes,
390 * but allow for a change in a later version.
391 */
392 if (header->length < sizeof(*header) + 8 ||
393 header->length > sizeof(*header) + SZ_4K)
394 goto error;
395
396 length = header->length - sizeof(*header);
397 randomdata = (u8 *)(header + 1);
398
399 pr_debug("Hyper-V: Seeding rng with %d random bytes from ACPI table OEM0\n",
400 length);
401
402 add_bootloader_randomness(randomdata, length);
403
404 /*
405 * To prevent the seed data from being visible in /sys/firmware/acpi,
406 * zero out the random data in the ACPI table and fixup the checksum.
407 * The zero'ing is done out of an abundance of caution in avoiding
408 * potential security risks to the rng. Similarly, reset the table
409 * length to just the header size so that a subsequent kexec doesn't
410 * try to use the zero'ed out random data.
411 */
412 for (i = 0; i < length; i++) {
413 header->checksum += randomdata[i];
414 randomdata[i] = 0;
415 }
416
417 for (i = 0; i < sizeof(header->length); i++)
418 header->checksum += ((u8 *)&header->length)[i];
419 header->length = sizeof(*header);
420 for (i = 0; i < sizeof(header->length); i++)
421 header->checksum -= ((u8 *)&header->length)[i];
422
423error:
424 acpi_put_table(header);
425}
426
427/*
428 * Hyper-V specific initialization and die code for
429 * individual CPUs that is common across all architectures.
430 * Called by the CPU hotplug mechanism.
431 */
432
433int hv_common_cpu_init(unsigned int cpu)
434{
435 void **inputarg, **outputarg;
436 u64 msr_vp_index;
437 gfp_t flags;
438 int pgcount = hv_root_partition ? 2 : 1;
439 void *mem;
440 int ret;
441
442 /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
443 flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
444
445 inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
446
447 /*
448 * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
449 * allocated if this CPU was previously online and then taken offline
450 */
451 if (!*inputarg) {
452 mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
453 if (!mem)
454 return -ENOMEM;
455
456 if (hv_root_partition) {
457 outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
458 *outputarg = (char *)mem + HV_HYP_PAGE_SIZE;
459 }
460
461 if (!ms_hyperv.paravisor_present &&
462 (hv_isolation_type_snp() || hv_isolation_type_tdx())) {
463 ret = set_memory_decrypted((unsigned long)mem, pgcount);
464 if (ret) {
465 /* It may be unsafe to free 'mem' */
466 return ret;
467 }
468
469 memset(mem, 0x00, pgcount * HV_HYP_PAGE_SIZE);
470 }
471
472 /*
473 * In a fully enlightened TDX/SNP VM with more than 64 VPs, if
474 * hyperv_pcpu_input_arg is not NULL, set_memory_decrypted() ->
475 * ... -> cpa_flush()-> ... -> __send_ipi_mask_ex() tries to
476 * use hyperv_pcpu_input_arg as the hypercall input page, which
477 * must be a decrypted page in such a VM, but the page is still
478 * encrypted before set_memory_decrypted() returns. Fix this by
479 * setting *inputarg after the above set_memory_decrypted(): if
480 * hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns
481 * HV_STATUS_INVALID_PARAMETER immediately, and the function
482 * hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(),
483 * which may be slightly slower than the hypercall, but still
484 * works correctly in such a VM.
485 */
486 *inputarg = mem;
487 }
488
489 msr_vp_index = hv_get_msr(HV_MSR_VP_INDEX);
490
491 hv_vp_index[cpu] = msr_vp_index;
492
493 if (msr_vp_index > hv_max_vp_index)
494 hv_max_vp_index = msr_vp_index;
495
496 return 0;
497}
498
499int hv_common_cpu_die(unsigned int cpu)
500{
501 /*
502 * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
503 * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
504 * may be used by the Hyper-V vPCI driver in reassigning interrupts
505 * as part of the offlining process. The interrupt reassignment
506 * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
507 * called this function.
508 *
509 * If a previously offlined CPU is brought back online again, the
510 * originally allocated memory is reused in hv_common_cpu_init().
511 */
512
513 return 0;
514}
515
516/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
517bool hv_query_ext_cap(u64 cap_query)
518{
519 /*
520 * The address of the 'hv_extended_cap' variable will be used as an
521 * output parameter to the hypercall below and so it should be
522 * compatible with 'virt_to_phys'. Which means, it's address should be
523 * directly mapped. Use 'static' to keep it compatible; stack variables
524 * can be virtually mapped, making them incompatible with
525 * 'virt_to_phys'.
526 * Hypercall input/output addresses should also be 8-byte aligned.
527 */
528 static u64 hv_extended_cap __aligned(8);
529 static bool hv_extended_cap_queried;
530 u64 status;
531
532 /*
533 * Querying extended capabilities is an extended hypercall. Check if the
534 * partition supports extended hypercall, first.
535 */
536 if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
537 return false;
538
539 /* Extended capabilities do not change at runtime. */
540 if (hv_extended_cap_queried)
541 return hv_extended_cap & cap_query;
542
543 status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
544 &hv_extended_cap);
545
546 /*
547 * The query extended capabilities hypercall should not fail under
548 * any normal circumstances. Avoid repeatedly making the hypercall, on
549 * error.
550 */
551 hv_extended_cap_queried = true;
552 if (!hv_result_success(status)) {
553 pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
554 status);
555 return false;
556 }
557
558 return hv_extended_cap & cap_query;
559}
560EXPORT_SYMBOL_GPL(hv_query_ext_cap);
561
562void hv_setup_dma_ops(struct device *dev, bool coherent)
563{
564 arch_setup_dma_ops(dev, coherent);
565}
566EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
567
568bool hv_is_hibernation_supported(void)
569{
570 return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
571}
572EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
573
574/*
575 * Default function to read the Hyper-V reference counter, independent
576 * of whether Hyper-V enlightened clocks/timers are being used. But on
577 * architectures where it is used, Hyper-V enlightenment code in
578 * hyperv_timer.c may override this function.
579 */
580static u64 __hv_read_ref_counter(void)
581{
582 return hv_get_msr(HV_MSR_TIME_REF_COUNT);
583}
584
585u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter;
586EXPORT_SYMBOL_GPL(hv_read_reference_counter);
587
588/* These __weak functions provide default "no-op" behavior and
589 * may be overridden by architecture specific versions. Architectures
590 * for which the default "no-op" behavior is sufficient can leave
591 * them unimplemented and not be cluttered with a bunch of stub
592 * functions in arch-specific code.
593 */
594
595bool __weak hv_is_isolation_supported(void)
596{
597 return false;
598}
599EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
600
601bool __weak hv_isolation_type_snp(void)
602{
603 return false;
604}
605EXPORT_SYMBOL_GPL(hv_isolation_type_snp);
606
607bool __weak hv_isolation_type_tdx(void)
608{
609 return false;
610}
611EXPORT_SYMBOL_GPL(hv_isolation_type_tdx);
612
613void __weak hv_setup_vmbus_handler(void (*handler)(void))
614{
615}
616EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
617
618void __weak hv_remove_vmbus_handler(void)
619{
620}
621EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
622
623void __weak hv_setup_kexec_handler(void (*handler)(void))
624{
625}
626EXPORT_SYMBOL_GPL(hv_setup_kexec_handler);
627
628void __weak hv_remove_kexec_handler(void)
629{
630}
631EXPORT_SYMBOL_GPL(hv_remove_kexec_handler);
632
633void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs))
634{
635}
636EXPORT_SYMBOL_GPL(hv_setup_crash_handler);
637
638void __weak hv_remove_crash_handler(void)
639{
640}
641EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
642
643void __weak hyperv_cleanup(void)
644{
645}
646EXPORT_SYMBOL_GPL(hyperv_cleanup);
647
648u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
649{
650 return HV_STATUS_INVALID_PARAMETER;
651}
652EXPORT_SYMBOL_GPL(hv_ghcb_hypercall);
653
654u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
655{
656 return HV_STATUS_INVALID_PARAMETER;
657}
658EXPORT_SYMBOL_GPL(hv_tdx_hypercall);