Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2#include <linux/acpi.h>
  3#include <linux/cpu.h>
  4#include <linux/delay.h>
  5#include <linux/io.h>
  6#include <linux/kexec.h>
  7#include <linux/memblock.h>
  8#include <linux/pgtable.h>
  9#include <linux/sched/hotplug.h>
 10#include <asm/apic.h>
 11#include <asm/barrier.h>
 12#include <asm/init.h>
 13#include <asm/intel_pt.h>
 14#include <asm/nmi.h>
 15#include <asm/processor.h>
 16#include <asm/reboot.h>
 17
 18/* Physical address of the Multiprocessor Wakeup Structure mailbox */
 19static u64 acpi_mp_wake_mailbox_paddr __ro_after_init;
 20
 21/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
 22static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
 23
 24static u64 acpi_mp_pgd __ro_after_init;
 25static u64 acpi_mp_reset_vector_paddr __ro_after_init;
 26
 27static void acpi_mp_stop_this_cpu(void)
 28{
 29	asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
 30}
 31
 32static void acpi_mp_play_dead(void)
 33{
 34	play_dead_common();
 35	asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
 36}
 37
 38static void acpi_mp_cpu_die(unsigned int cpu)
 39{
 40	u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
 41	unsigned long timeout;
 42
 43	/*
 44	 * Use TEST mailbox command to prove that BIOS got control over
 45	 * the CPU before declaring it dead.
 46	 *
 47	 * BIOS has to clear 'command' field of the mailbox.
 48	 */
 49	acpi_mp_wake_mailbox->apic_id = apicid;
 50	smp_store_release(&acpi_mp_wake_mailbox->command,
 51			  ACPI_MP_WAKE_COMMAND_TEST);
 52
 53	/* Don't wait longer than a second. */
 54	timeout = USEC_PER_SEC;
 55	while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout)
 56		udelay(1);
 57
 58	if (!timeout)
 59		pr_err("Failed to hand over CPU %d to BIOS\n", cpu);
 60}
 61
 62/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */
 63static void __init *alloc_pgt_page(void *dummy)
 64{
 65	return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 66}
 67
 68static void __init free_pgt_page(void *pgt, void *dummy)
 69{
 70	return memblock_free(pgt, PAGE_SIZE);
 71}
 72
 73/*
 74 * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at
 75 * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches
 76 * to the identity mapping and the function has be present at the same spot in
 77 * the virtual address space before and after switching page tables.
 78 */
 79static int __init init_transition_pgtable(pgd_t *pgd)
 80{
 81	pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
 82	unsigned long vaddr, paddr;
 83	p4d_t *p4d;
 84	pud_t *pud;
 85	pmd_t *pmd;
 86	pte_t *pte;
 87
 88	vaddr = (unsigned long)asm_acpi_mp_play_dead;
 89	pgd += pgd_index(vaddr);
 90	if (!pgd_present(*pgd)) {
 91		p4d = (p4d_t *)alloc_pgt_page(NULL);
 92		if (!p4d)
 93			return -ENOMEM;
 94		set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
 95	}
 96	p4d = p4d_offset(pgd, vaddr);
 97	if (!p4d_present(*p4d)) {
 98		pud = (pud_t *)alloc_pgt_page(NULL);
 99		if (!pud)
100			return -ENOMEM;
101		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
102	}
103	pud = pud_offset(p4d, vaddr);
104	if (!pud_present(*pud)) {
105		pmd = (pmd_t *)alloc_pgt_page(NULL);
106		if (!pmd)
107			return -ENOMEM;
108		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
109	}
110	pmd = pmd_offset(pud, vaddr);
111	if (!pmd_present(*pmd)) {
112		pte = (pte_t *)alloc_pgt_page(NULL);
113		if (!pte)
114			return -ENOMEM;
115		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
116	}
117	pte = pte_offset_kernel(pmd, vaddr);
118
119	paddr = __pa(vaddr);
120	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
121
122	return 0;
123}
124
125static int __init acpi_mp_setup_reset(u64 reset_vector)
126{
127	struct x86_mapping_info info = {
128		.alloc_pgt_page = alloc_pgt_page,
129		.free_pgt_page	= free_pgt_page,
130		.page_flag      = __PAGE_KERNEL_LARGE_EXEC,
131		.kernpg_flag    = _KERNPG_TABLE_NOENC,
132	};
133	pgd_t *pgd;
134
135	pgd = alloc_pgt_page(NULL);
136	if (!pgd)
137		return -ENOMEM;
138
139	for (int i = 0; i < nr_pfn_mapped; i++) {
140		unsigned long mstart, mend;
141
142		mstart = pfn_mapped[i].start << PAGE_SHIFT;
143		mend   = pfn_mapped[i].end << PAGE_SHIFT;
144		if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
145			kernel_ident_mapping_free(&info, pgd);
146			return -ENOMEM;
147		}
148	}
149
150	if (kernel_ident_mapping_init(&info, pgd,
151				      PAGE_ALIGN_DOWN(reset_vector),
152				      PAGE_ALIGN(reset_vector + 1))) {
153		kernel_ident_mapping_free(&info, pgd);
154		return -ENOMEM;
155	}
156
157	if (init_transition_pgtable(pgd)) {
158		kernel_ident_mapping_free(&info, pgd);
159		return -ENOMEM;
160	}
161
162	smp_ops.play_dead = acpi_mp_play_dead;
163	smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu;
164	smp_ops.cpu_die = acpi_mp_cpu_die;
165
166	acpi_mp_reset_vector_paddr = reset_vector;
167	acpi_mp_pgd = __pa(pgd);
168
169	return 0;
170}
171
172static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
173{
174	if (!acpi_mp_wake_mailbox_paddr) {
175		pr_warn_once("No MADT mailbox: cannot bringup secondary CPUs. Booting with kexec?\n");
176		return -EOPNOTSUPP;
177	}
178
179	/*
180	 * Remap mailbox memory only for the first call to acpi_wakeup_cpu().
181	 *
182	 * Wakeup of secondary CPUs is fully serialized in the core code.
183	 * No need to protect acpi_mp_wake_mailbox from concurrent accesses.
184	 */
185	if (!acpi_mp_wake_mailbox) {
186		acpi_mp_wake_mailbox = memremap(acpi_mp_wake_mailbox_paddr,
187						sizeof(*acpi_mp_wake_mailbox),
188						MEMREMAP_WB);
189	}
190
191	/*
192	 * Mailbox memory is shared between the firmware and OS. Firmware will
193	 * listen on mailbox command address, and once it receives the wakeup
194	 * command, the CPU associated with the given apicid will be booted.
195	 *
196	 * The value of 'apic_id' and 'wakeup_vector' must be visible to the
197	 * firmware before the wakeup command is visible.  smp_store_release()
198	 * ensures ordering and visibility.
199	 */
200	acpi_mp_wake_mailbox->apic_id	    = apicid;
201	acpi_mp_wake_mailbox->wakeup_vector = start_ip;
202	smp_store_release(&acpi_mp_wake_mailbox->command,
203			  ACPI_MP_WAKE_COMMAND_WAKEUP);
204
205	/*
206	 * Wait for the CPU to wake up.
207	 *
208	 * The CPU being woken up is essentially in a spin loop waiting to be
209	 * woken up. It should not take long for it wake up and acknowledge by
210	 * zeroing out ->command.
211	 *
212	 * ACPI specification doesn't provide any guidance on how long kernel
213	 * has to wait for a wake up acknowledgment. It also doesn't provide
214	 * a way to cancel a wake up request if it takes too long.
215	 *
216	 * In TDX environment, the VMM has control over how long it takes to
217	 * wake up secondary. It can postpone scheduling secondary vCPU
218	 * indefinitely. Giving up on wake up request and reporting error opens
219	 * possible attack vector for VMM: it can wake up a secondary CPU when
220	 * kernel doesn't expect it. Wait until positive result of the wake up
221	 * request.
222	 */
223	while (READ_ONCE(acpi_mp_wake_mailbox->command))
224		cpu_relax();
225
226	return 0;
227}
228
229static void acpi_mp_disable_offlining(struct acpi_madt_multiproc_wakeup *mp_wake)
230{
231	cpu_hotplug_disable_offlining();
232
233	/*
234	 * ACPI MADT doesn't allow to offline a CPU after it was onlined. This
235	 * limits kexec: the second kernel won't be able to use more than one CPU.
236	 *
237	 * To prevent a kexec kernel from onlining secondary CPUs invalidate the
238	 * mailbox address in the ACPI MADT wakeup structure which prevents a
239	 * kexec kernel to use it.
240	 *
241	 * This is safe as the booting kernel has the mailbox address cached
242	 * already and acpi_wakeup_cpu() uses the cached value to bring up the
243	 * secondary CPUs.
244	 *
245	 * Note: This is a Linux specific convention and not covered by the
246	 *       ACPI specification.
247	 */
248	mp_wake->mailbox_address = 0;
249}
250
251int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
252			      const unsigned long end)
253{
254	struct acpi_madt_multiproc_wakeup *mp_wake;
255
256	mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
257
258	/*
259	 * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake
260	 * entry.  'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger
261	 * than the actual size of the MP wakeup entry in ACPI table because the
262	 * 'reset_vector' is only available in the V1 MP wakeup structure.
263	 */
264	if (!mp_wake)
265		return -EINVAL;
266	if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
267		return -EINVAL;
268	if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
269		return -EINVAL;
270
271	acpi_table_print_madt_entry(&header->common);
272
273	acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
274
275	if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 &&
276	    mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) {
277		if (acpi_mp_setup_reset(mp_wake->reset_vector)) {
278			pr_warn("Failed to setup MADT reset vector\n");
279			acpi_mp_disable_offlining(mp_wake);
280		}
281	} else {
282		/*
283		 * CPU offlining requires version 1 of the ACPI MADT wakeup
284		 * structure.
285		 */
286		acpi_mp_disable_offlining(mp_wake);
287	}
288
289	apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
290
291	return 0;
292}