Linux Audio

Check our new training course

Loading...
v4.6
 
  1/*
  2 * Virtual Memory Map support
  3 *
  4 * (C) 2007 sgi. Christoph Lameter.
  5 *
  6 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
  7 * virt_to_page, page_address() to be implemented as a base offset
  8 * calculation without memory access.
  9 *
 10 * However, virtual mappings need a page table and TLBs. Many Linux
 11 * architectures already map their physical space using 1-1 mappings
 12 * via TLBs. For those arches the virtual memory map is essentially
 13 * for free if we use the same page size as the 1-1 mappings. In that
 14 * case the overhead consists of a few additional pages that are
 15 * allocated to create a view of memory for vmemmap.
 16 *
 17 * The architecture is expected to provide a vmemmap_populate() function
 18 * to instantiate the mapping.
 19 */
 20#include <linux/mm.h>
 21#include <linux/mmzone.h>
 22#include <linux/bootmem.h>
 23#include <linux/memremap.h>
 24#include <linux/highmem.h>
 25#include <linux/slab.h>
 26#include <linux/spinlock.h>
 27#include <linux/vmalloc.h>
 28#include <linux/sched.h>
 
 29#include <asm/dma.h>
 30#include <asm/pgalloc.h>
 31#include <asm/pgtable.h>
 32
 33/*
 34 * Allocate a block of memory to be used to back the virtual memory map
 35 * or to back the page tables that are used to create the mapping.
 36 * Uses the main allocators if they are available, else bootmem.
 37 */
 38
 39static void * __init_refok __earlyonly_bootmem_alloc(int node,
 40				unsigned long size,
 41				unsigned long align,
 42				unsigned long goal)
 43{
 44	return memblock_virt_alloc_try_nid(size, align, goal,
 45					    BOOTMEM_ALLOC_ACCESSIBLE, node);
 46}
 47
 48static void *vmemmap_buf;
 49static void *vmemmap_buf_end;
 50
 51void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 52{
 53	/* If the main allocator is up use that, fallback to bootmem. */
 54	if (slab_is_available()) {
 
 
 
 55		struct page *page;
 56
 57		if (node_state(node, N_HIGH_MEMORY))
 58			page = alloc_pages_node(
 59				node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
 60				get_order(size));
 61		else
 62			page = alloc_pages(
 63				GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT,
 64				get_order(size));
 65		if (page)
 66			return page_address(page);
 
 
 
 
 
 
 67		return NULL;
 68	} else
 69		return __earlyonly_bootmem_alloc(node, size, size,
 70				__pa(MAX_DMA_ADDRESS));
 71}
 72
 
 
 
 73/* need to make sure size is all the same during early stage */
 74static void * __meminit alloc_block_buf(unsigned long size, int node)
 
 75{
 76	void *ptr;
 77
 78	if (!vmemmap_buf)
 79		return vmemmap_alloc_block(size, node);
 80
 81	/* take the from buf */
 82	ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
 83	if (ptr + size > vmemmap_buf_end)
 84		return vmemmap_alloc_block(size, node);
 85
 86	vmemmap_buf = ptr + size;
 87
 
 
 
 88	return ptr;
 89}
 90
 91static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
 92{
 93	return altmap->base_pfn + altmap->reserve + altmap->alloc
 94		+ altmap->align;
 95}
 96
 97static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
 98{
 99	unsigned long allocated = altmap->alloc + altmap->align;
100
101	if (altmap->free > allocated)
102		return altmap->free - allocated;
103	return 0;
104}
105
106/**
107 * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation
108 * @altmap - reserved page pool for the allocation
109 * @nr_pfns - size (in pages) of the allocation
110 *
111 * Allocations are aligned to the size of the request
112 */
113static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap,
114		unsigned long nr_pfns)
115{
116	unsigned long pfn = vmem_altmap_next_pfn(altmap);
117	unsigned long nr_align;
118
119	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
120	nr_align = ALIGN(pfn, nr_align) - pfn;
121
122	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
123		return ULONG_MAX;
124	altmap->alloc += nr_pfns;
125	altmap->align += nr_align;
126	return pfn + nr_align;
127}
128
129static void * __meminit altmap_alloc_block_buf(unsigned long size,
130		struct vmem_altmap *altmap)
131{
132	unsigned long pfn, nr_pfns;
133	void *ptr;
134
135	if (size & ~PAGE_MASK) {
136		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
137				__func__, size);
138		return NULL;
139	}
140
 
141	nr_pfns = size >> PAGE_SHIFT;
142	pfn = vmem_altmap_alloc(altmap, nr_pfns);
143	if (pfn < ULONG_MAX)
144		ptr = __va(__pfn_to_phys(pfn));
145	else
146		ptr = NULL;
147	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
148			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
149
150	return ptr;
151}
 
152
153/* need to make sure size is all the same during early stage */
154void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node,
155		struct vmem_altmap *altmap)
156{
157	if (altmap)
158		return altmap_alloc_block_buf(size, altmap);
159	return alloc_block_buf(size, node);
160}
161
162void __meminit vmemmap_verify(pte_t *pte, int node,
163				unsigned long start, unsigned long end)
164{
165	unsigned long pfn = pte_pfn(*pte);
166	int actual_node = early_pfn_to_nid(pfn);
167
168	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
169		pr_warn("[%lx-%lx] potential offnode page_structs\n",
170			start, end - 1);
171}
172
173pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
 
 
174{
175	pte_t *pte = pte_offset_kernel(pmd, addr);
176	if (pte_none(*pte)) {
177		pte_t entry;
178		void *p = alloc_block_buf(PAGE_SIZE, node);
179		if (!p)
180			return NULL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
182		set_pte_at(&init_mm, addr, pte, entry);
183	}
184	return pte;
185}
186
 
 
 
 
 
 
 
 
 
 
 
187pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
188{
189	pmd_t *pmd = pmd_offset(pud, addr);
190	if (pmd_none(*pmd)) {
191		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
192		if (!p)
193			return NULL;
 
194		pmd_populate_kernel(&init_mm, pmd, p);
195	}
196	return pmd;
197}
198
199pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
200{
201	pud_t *pud = pud_offset(pgd, addr);
202	if (pud_none(*pud)) {
203		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
204		if (!p)
205			return NULL;
 
206		pud_populate(&init_mm, pud, p);
207	}
208	return pud;
209}
210
 
 
 
 
 
 
 
 
 
 
 
 
 
211pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
212{
213	pgd_t *pgd = pgd_offset_k(addr);
214	if (pgd_none(*pgd)) {
215		void *p = vmemmap_alloc_block(PAGE_SIZE, node);
216		if (!p)
217			return NULL;
218		pgd_populate(&init_mm, pgd, p);
219	}
220	return pgd;
221}
222
223int __meminit vmemmap_populate_basepages(unsigned long start,
224					 unsigned long end, int node)
 
225{
226	unsigned long addr = start;
227	pgd_t *pgd;
 
228	pud_t *pud;
229	pmd_t *pmd;
230	pte_t *pte;
231
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232	for (; addr < end; addr += PAGE_SIZE) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233		pgd = vmemmap_pgd_populate(addr, node);
234		if (!pgd)
235			return -ENOMEM;
236		pud = vmemmap_pud_populate(pgd, addr, node);
237		if (!pud)
 
238			return -ENOMEM;
239		pmd = vmemmap_pmd_populate(pud, addr, node);
240		if (!pmd)
 
241			return -ENOMEM;
242		pte = vmemmap_pte_populate(pmd, addr, node);
243		if (!pte)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244			return -ENOMEM;
245		vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
246	}
247
248	return 0;
249}
250
251struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252{
253	unsigned long start;
254	unsigned long end;
255	struct page *map;
256
257	map = pfn_to_page(pnum * PAGES_PER_SECTION);
258	start = (unsigned long)map;
259	end = (unsigned long)(map + PAGES_PER_SECTION);
260
261	if (vmemmap_populate(start, end, nid))
 
 
 
 
 
262		return NULL;
263
264	return map;
265}
266
267void __init sparse_mem_maps_populate_node(struct page **map_map,
268					  unsigned long pnum_begin,
269					  unsigned long pnum_end,
270					  unsigned long map_count, int nodeid)
271{
272	unsigned long pnum;
273	unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
274	void *vmemmap_buf_start;
275
276	size = ALIGN(size, PMD_SIZE);
277	vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
278			 PMD_SIZE, __pa(MAX_DMA_ADDRESS));
 
279
280	if (vmemmap_buf_start) {
281		vmemmap_buf = vmemmap_buf_start;
282		vmemmap_buf_end = vmemmap_buf_start + size * map_count;
 
 
 
283	}
284
285	for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
286		struct mem_section *ms;
 
287
288		if (!present_section_nr(pnum))
289			continue;
 
 
290
291		map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
292		if (map_map[pnum])
293			continue;
294		ms = __nr_to_section(pnum);
295		pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
296		       __func__);
297		ms->section_mem_map = 0;
298	}
299
300	if (vmemmap_buf_start) {
301		/* need to free left buf */
302		memblock_free_early(__pa(vmemmap_buf),
303				    vmemmap_buf_end - vmemmap_buf);
304		vmemmap_buf = NULL;
305		vmemmap_buf_end = NULL;
 
 
 
306	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307}
v6.13.7
  1// SPDX-License-Identifier: GPL-2.0
  2/*
  3 * Virtual Memory Map support
  4 *
  5 * (C) 2007 sgi. Christoph Lameter.
  6 *
  7 * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
  8 * virt_to_page, page_address() to be implemented as a base offset
  9 * calculation without memory access.
 10 *
 11 * However, virtual mappings need a page table and TLBs. Many Linux
 12 * architectures already map their physical space using 1-1 mappings
 13 * via TLBs. For those arches the virtual memory map is essentially
 14 * for free if we use the same page size as the 1-1 mappings. In that
 15 * case the overhead consists of a few additional pages that are
 16 * allocated to create a view of memory for vmemmap.
 17 *
 18 * The architecture is expected to provide a vmemmap_populate() function
 19 * to instantiate the mapping.
 20 */
 21#include <linux/mm.h>
 22#include <linux/mmzone.h>
 23#include <linux/memblock.h>
 24#include <linux/memremap.h>
 25#include <linux/highmem.h>
 26#include <linux/slab.h>
 27#include <linux/spinlock.h>
 28#include <linux/vmalloc.h>
 29#include <linux/sched.h>
 30
 31#include <asm/dma.h>
 32#include <asm/pgalloc.h>
 
 33
 34/*
 35 * Allocate a block of memory to be used to back the virtual memory map
 36 * or to back the page tables that are used to create the mapping.
 37 * Uses the main allocators if they are available, else bootmem.
 38 */
 39
 40static void * __ref __earlyonly_bootmem_alloc(int node,
 41				unsigned long size,
 42				unsigned long align,
 43				unsigned long goal)
 44{
 45	return memblock_alloc_try_nid_raw(size, align, goal,
 46					       MEMBLOCK_ALLOC_ACCESSIBLE, node);
 47}
 48
 
 
 
 49void * __meminit vmemmap_alloc_block(unsigned long size, int node)
 50{
 51	/* If the main allocator is up use that, fallback to bootmem. */
 52	if (slab_is_available()) {
 53		gfp_t gfp_mask = GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN;
 54		int order = get_order(size);
 55		static bool warned;
 56		struct page *page;
 57
 58		page = alloc_pages_node(node, gfp_mask, order);
 
 
 
 
 
 
 
 59		if (page)
 60			return page_address(page);
 61
 62		if (!warned) {
 63			warn_alloc(gfp_mask & ~__GFP_NOWARN, NULL,
 64				   "vmemmap alloc failure: order:%u", order);
 65			warned = true;
 66		}
 67		return NULL;
 68	} else
 69		return __earlyonly_bootmem_alloc(node, size, size,
 70				__pa(MAX_DMA_ADDRESS));
 71}
 72
 73static void * __meminit altmap_alloc_block_buf(unsigned long size,
 74					       struct vmem_altmap *altmap);
 75
 76/* need to make sure size is all the same during early stage */
 77void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node,
 78					 struct vmem_altmap *altmap)
 79{
 80	void *ptr;
 81
 82	if (altmap)
 83		return altmap_alloc_block_buf(size, altmap);
 
 
 
 
 
 
 
 84
 85	ptr = sparse_buffer_alloc(size);
 86	if (!ptr)
 87		ptr = vmemmap_alloc_block(size, node);
 88	return ptr;
 89}
 90
 91static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
 92{
 93	return altmap->base_pfn + altmap->reserve + altmap->alloc
 94		+ altmap->align;
 95}
 96
 97static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
 98{
 99	unsigned long allocated = altmap->alloc + altmap->align;
100
101	if (altmap->free > allocated)
102		return altmap->free - allocated;
103	return 0;
104}
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106static void * __meminit altmap_alloc_block_buf(unsigned long size,
107					       struct vmem_altmap *altmap)
108{
109	unsigned long pfn, nr_pfns, nr_align;
 
110
111	if (size & ~PAGE_MASK) {
112		pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
113				__func__, size);
114		return NULL;
115	}
116
117	pfn = vmem_altmap_next_pfn(altmap);
118	nr_pfns = size >> PAGE_SHIFT;
119	nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
120	nr_align = ALIGN(pfn, nr_align) - pfn;
121	if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
122		return NULL;
 
 
 
123
124	altmap->alloc += nr_pfns;
125	altmap->align += nr_align;
126	pfn += nr_align;
127
128	pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
129			__func__, pfn, altmap->alloc, altmap->align, nr_pfns);
130	return __va(__pfn_to_phys(pfn));
 
 
 
 
131}
132
133void __meminit vmemmap_verify(pte_t *pte, int node,
134				unsigned long start, unsigned long end)
135{
136	unsigned long pfn = pte_pfn(ptep_get(pte));
137	int actual_node = early_pfn_to_nid(pfn);
138
139	if (node_distance(actual_node, node) > LOCAL_DISTANCE)
140		pr_warn_once("[%lx-%lx] potential offnode page_structs\n",
141			start, end - 1);
142}
143
144pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
145				       struct vmem_altmap *altmap,
146				       struct page *reuse)
147{
148	pte_t *pte = pte_offset_kernel(pmd, addr);
149	if (pte_none(ptep_get(pte))) {
150		pte_t entry;
151		void *p;
152
153		if (!reuse) {
154			p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
155			if (!p)
156				return NULL;
157		} else {
158			/*
159			 * When a PTE/PMD entry is freed from the init_mm
160			 * there's a free_pages() call to this page allocated
161			 * above. Thus this get_page() is paired with the
162			 * put_page_testzero() on the freeing path.
163			 * This can only called by certain ZONE_DEVICE path,
164			 * and through vmemmap_populate_compound_pages() when
165			 * slab is available.
166			 */
167			get_page(reuse);
168			p = page_to_virt(reuse);
169		}
170		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
171		set_pte_at(&init_mm, addr, pte, entry);
172	}
173	return pte;
174}
175
176static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
177{
178	void *p = vmemmap_alloc_block(size, node);
179
180	if (!p)
181		return NULL;
182	memset(p, 0, size);
183
184	return p;
185}
186
187pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
188{
189	pmd_t *pmd = pmd_offset(pud, addr);
190	if (pmd_none(*pmd)) {
191		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
192		if (!p)
193			return NULL;
194		kernel_pte_init(p);
195		pmd_populate_kernel(&init_mm, pmd, p);
196	}
197	return pmd;
198}
199
200pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
201{
202	pud_t *pud = pud_offset(p4d, addr);
203	if (pud_none(*pud)) {
204		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
205		if (!p)
206			return NULL;
207		pmd_init(p);
208		pud_populate(&init_mm, pud, p);
209	}
210	return pud;
211}
212
213p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
214{
215	p4d_t *p4d = p4d_offset(pgd, addr);
216	if (p4d_none(*p4d)) {
217		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
218		if (!p)
219			return NULL;
220		pud_init(p);
221		p4d_populate(&init_mm, p4d, p);
222	}
223	return p4d;
224}
225
226pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
227{
228	pgd_t *pgd = pgd_offset_k(addr);
229	if (pgd_none(*pgd)) {
230		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
231		if (!p)
232			return NULL;
233		pgd_populate(&init_mm, pgd, p);
234	}
235	return pgd;
236}
237
238static pte_t * __meminit vmemmap_populate_address(unsigned long addr, int node,
239					      struct vmem_altmap *altmap,
240					      struct page *reuse)
241{
 
242	pgd_t *pgd;
243	p4d_t *p4d;
244	pud_t *pud;
245	pmd_t *pmd;
246	pte_t *pte;
247
248	pgd = vmemmap_pgd_populate(addr, node);
249	if (!pgd)
250		return NULL;
251	p4d = vmemmap_p4d_populate(pgd, addr, node);
252	if (!p4d)
253		return NULL;
254	pud = vmemmap_pud_populate(p4d, addr, node);
255	if (!pud)
256		return NULL;
257	pmd = vmemmap_pmd_populate(pud, addr, node);
258	if (!pmd)
259		return NULL;
260	pte = vmemmap_pte_populate(pmd, addr, node, altmap, reuse);
261	if (!pte)
262		return NULL;
263	vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
264
265	return pte;
266}
267
268static int __meminit vmemmap_populate_range(unsigned long start,
269					    unsigned long end, int node,
270					    struct vmem_altmap *altmap,
271					    struct page *reuse)
272{
273	unsigned long addr = start;
274	pte_t *pte;
275
276	for (; addr < end; addr += PAGE_SIZE) {
277		pte = vmemmap_populate_address(addr, node, altmap, reuse);
278		if (!pte)
279			return -ENOMEM;
280	}
281
282	return 0;
283}
284
285int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
286					 int node, struct vmem_altmap *altmap)
287{
288	return vmemmap_populate_range(start, end, node, altmap, NULL);
289}
290
291void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
292				      unsigned long addr, unsigned long next)
293{
294}
295
296int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
297				       unsigned long addr, unsigned long next)
298{
299	return 0;
300}
301
302int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
303					 int node, struct vmem_altmap *altmap)
304{
305	unsigned long addr;
306	unsigned long next;
307	pgd_t *pgd;
308	p4d_t *p4d;
309	pud_t *pud;
310	pmd_t *pmd;
311
312	for (addr = start; addr < end; addr = next) {
313		next = pmd_addr_end(addr, end);
314
315		pgd = vmemmap_pgd_populate(addr, node);
316		if (!pgd)
317			return -ENOMEM;
318
319		p4d = vmemmap_p4d_populate(pgd, addr, node);
320		if (!p4d)
321			return -ENOMEM;
322
323		pud = vmemmap_pud_populate(p4d, addr, node);
324		if (!pud)
325			return -ENOMEM;
326
327		pmd = pmd_offset(pud, addr);
328		if (pmd_none(READ_ONCE(*pmd))) {
329			void *p;
330
331			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
332			if (p) {
333				vmemmap_set_pmd(pmd, p, node, addr, next);
334				continue;
335			} else if (altmap) {
336				/*
337				 * No fallback: In any case we care about, the
338				 * altmap should be reasonably sized and aligned
339				 * such that vmemmap_alloc_block_buf() will always
340				 * succeed. For consistency with the PTE case,
341				 * return an error here as failure could indicate
342				 * a configuration issue with the size of the altmap.
343				 */
344				return -ENOMEM;
345			}
346		} else if (vmemmap_check_pmd(pmd, node, addr, next))
347			continue;
348		if (vmemmap_populate_basepages(addr, next, node, altmap))
349			return -ENOMEM;
 
350	}
 
351	return 0;
352}
353
354#ifndef vmemmap_populate_compound_pages
355/*
356 * For compound pages bigger than section size (e.g. x86 1G compound
357 * pages with 2M subsection size) fill the rest of sections as tail
358 * pages.
359 *
360 * Note that memremap_pages() resets @nr_range value and will increment
361 * it after each range successful onlining. Thus the value or @nr_range
362 * at section memmap populate corresponds to the in-progress range
363 * being onlined here.
364 */
365static bool __meminit reuse_compound_section(unsigned long start_pfn,
366					     struct dev_pagemap *pgmap)
367{
368	unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
369	unsigned long offset = start_pfn -
370		PHYS_PFN(pgmap->ranges[pgmap->nr_range].start);
371
372	return !IS_ALIGNED(offset, nr_pages) && nr_pages > PAGES_PER_SUBSECTION;
373}
374
375static pte_t * __meminit compound_section_tail_page(unsigned long addr)
376{
377	pte_t *pte;
 
 
378
379	addr -= PAGE_SIZE;
 
 
380
381	/*
382	 * Assuming sections are populated sequentially, the previous section's
383	 * page data can be reused.
384	 */
385	pte = pte_offset_kernel(pmd_off_k(addr), addr);
386	if (!pte)
387		return NULL;
388
389	return pte;
390}
391
392static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
393						     unsigned long start,
394						     unsigned long end, int node,
395						     struct dev_pagemap *pgmap)
396{
397	unsigned long size, addr;
398	pte_t *pte;
399	int rc;
400
401	if (reuse_compound_section(start_pfn, pgmap)) {
402		pte = compound_section_tail_page(start);
403		if (!pte)
404			return -ENOMEM;
405
406		/*
407		 * Reuse the page that was populated in the prior iteration
408		 * with just tail struct pages.
409		 */
410		return vmemmap_populate_range(start, end, node, NULL,
411					      pte_page(ptep_get(pte)));
412	}
413
414	size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
415	for (addr = start; addr < end; addr += size) {
416		unsigned long next, last = addr + size;
417
418		/* Populate the head page vmemmap page */
419		pte = vmemmap_populate_address(addr, node, NULL, NULL);
420		if (!pte)
421			return -ENOMEM;
422
423		/* Populate the tail pages vmemmap page */
424		next = addr + PAGE_SIZE;
425		pte = vmemmap_populate_address(next, node, NULL, NULL);
426		if (!pte)
427			return -ENOMEM;
 
 
 
428
429		/*
430		 * Reuse the previous page for the rest of tail pages
431		 * See layout diagram in Documentation/mm/vmemmap_dedup.rst
432		 */
433		next += PAGE_SIZE;
434		rc = vmemmap_populate_range(next, last, node, NULL,
435					    pte_page(ptep_get(pte)));
436		if (rc)
437			return -ENOMEM;
438	}
439
440	return 0;
441}
442
443#endif
444
445struct page * __meminit __populate_section_memmap(unsigned long pfn,
446		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
447		struct dev_pagemap *pgmap)
448{
449	unsigned long start = (unsigned long) pfn_to_page(pfn);
450	unsigned long end = start + nr_pages * sizeof(struct page);
451	int r;
452
453	if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
454		!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
455		return NULL;
456
457	if (vmemmap_can_optimize(altmap, pgmap))
458		r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
459	else
460		r = vmemmap_populate(start, end, nid, altmap);
461
462	if (r < 0)
463		return NULL;
464
465	if (system_state == SYSTEM_BOOTING)
466		memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
467	else
468		memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE));
469
470	return pfn_to_page(pfn);
471}