Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | // SPDX-License-Identifier: GPL-2.0 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/export.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/percpu.h> #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/smp.h> #include <linux/topology.h> #include <linux/pfn.h> #include <linux/stackprotector.h> #include <asm/sections.h> #include <asm/processor.h> #include <asm/desc.h> #include <asm/setup.h> #include <asm/mpspec.h> #include <asm/apicdef.h> #include <asm/highmem.h> #include <asm/proto.h> #include <asm/cpumask.h> #include <asm/cpu.h> #ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) #else #define BOOT_PERCPU_OFFSET 0 #endif DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET; EXPORT_PER_CPU_SYMBOL(this_cpu_off); unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET, }; EXPORT_SYMBOL(__per_cpu_offset); /* * On x86_64 symbols referenced from code should be reachable using * 32bit relocations. Reserve space for static percpu variables in * modules so that they are always served from the first chunk which * is located at the percpu segment base. On x86_32, anything can * address anywhere. No need to reserve space in the first chunk. */ #ifdef CONFIG_X86_64 #define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE #else #define PERCPU_FIRST_CHUNK_RESERVE 0 #endif #ifdef CONFIG_X86_32 /** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * * If NUMA is not configured or there is only one NUMA node available, * there is no reason to consider NUMA. This function determines * whether percpu allocation should consider NUMA or not. * * RETURNS: * true if NUMA should be considered; otherwise, false. */ static bool __init pcpu_need_numa(void) { #ifdef CONFIG_NUMA pg_data_t *last = NULL; unsigned int cpu; for_each_possible_cpu(cpu) { int node = early_cpu_to_node(cpu); if (node_online(node) && NODE_DATA(node) && last && last != NODE_DATA(node)) return true; last = NODE_DATA(node); } #endif return false; } #endif static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { #ifdef CONFIG_NUMA if (early_cpu_to_node(from) == early_cpu_to_node(to)) return LOCAL_DISTANCE; else return REMOTE_DISTANCE; #else return LOCAL_DISTANCE; #endif } static int __init pcpu_cpu_to_node(int cpu) { return early_cpu_to_node(cpu); } void __init pcpu_populate_pte(unsigned long addr) { populate_extra_pte(addr); } static inline void setup_percpu_segment(int cpu) { #ifdef CONFIG_X86_32 struct desc_struct d = GDT_ENTRY_INIT(DESC_DATA32, per_cpu_offset(cpu), 0xFFFFF); write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PERCPU, &d, DESCTYPE_S); #endif } void __init setup_per_cpu_areas(void) { unsigned int cpu; unsigned long delta; int rc; pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%u\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); /* * Allocate percpu area. Embedding allocator is our favorite; * however, on NUMA configurations, it can result in very * sparse unit mapping and vmalloc area isn't spacious enough * on 32bit. Use page in that case. */ #ifdef CONFIG_X86_32 if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) pcpu_chosen_fc = PCPU_FC_PAGE; #endif rc = -EINVAL; if (pcpu_chosen_fc != PCPU_FC_PAGE) { const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; size_t atom_size; /* * On 64bit, use PMD_SIZE for atom_size so that embedded * percpu areas are aligned to PMD. This, in the future, * can also allow using PMD mappings in vmalloc area. Use * PAGE_SIZE on 32bit as vmalloc space is highly contended * and large vmalloc area allocs can easily fail. */ #ifdef CONFIG_X86_64 atom_size = PMD_SIZE; #else atom_size = PAGE_SIZE; #endif rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size, atom_size, pcpu_cpu_distance, pcpu_cpu_to_node); if (rc < 0) pr_warn("%s allocator failed (%d), falling back to page size\n", pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); /* alrighty, percpu areas up and running */ delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); per_cpu(pcpu_hot.cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* * Copy data used in early init routines from the * initial arrays to the per cpu data areas. These * arrays then become expendable and the *_early_ptr's * are zeroed indicating that the static arrays are * gone. */ #ifdef CONFIG_X86_LOCAL_APIC per_cpu(x86_cpu_to_apicid, cpu) = early_per_cpu_map(x86_cpu_to_apicid, cpu); per_cpu(x86_cpu_to_acpiid, cpu) = early_per_cpu_map(x86_cpu_to_acpiid, cpu); #endif #ifdef CONFIG_NUMA per_cpu(x86_cpu_to_node_map, cpu) = early_per_cpu_map(x86_cpu_to_node_map, cpu); /* * Ensure that the boot cpu numa_node is correct when the boot * cpu is on a node that doesn't have memory installed. * Also cpu_up() will call cpu_to_node() for APs when * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set * up later with c_init aka intel_init/amd_init. * So set them all (boot cpu and all APs). */ set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); #endif /* * Up to this point, the boot CPU has been using .init.data * area. Reload any changed state for the boot CPU. */ if (!cpu) switch_gdt_and_percpu_base(cpu); } /* indicate the early static arrays will soon be gone */ #ifdef CONFIG_X86_LOCAL_APIC early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; early_per_cpu_ptr(x86_cpu_to_acpiid) = NULL; #endif #ifdef CONFIG_NUMA early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; #endif /* Setup node to cpumask map */ setup_node_to_cpumask_map(); /* Setup cpu initialized, callin, callout masks */ setup_cpu_local_masks(); /* * Sync back kernel address range again. We already did this in * setup_arch(), but percpu data also needs to be available in * the smpboot asm and arch_sync_kernel_mappings() doesn't sync to * swapper_pg_dir on 32-bit. The per-cpu mappings need to be available * there too. * * FIXME: Can the later sync in setup_cpu_entry_areas() replace * this call? */ sync_initial_page_table(); } |