tlb.h - include/asm-generic/tlb.h - Linux diff v3.1

  1/* include/asm-generic/tlb.h
  2 *
  3 *	Generic TLB shootdown code
  4 *
  5 * Copyright 2001 Red Hat, Inc.
  6 * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  7 *
  8 * Copyright 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  9 *
 10 * This program is free software; you can redistribute it and/or
 11 * modify it under the terms of the GNU General Public License
 12 * as published by the Free Software Foundation; either version
 13 * 2 of the License, or (at your option) any later version.
 14 */
 15#ifndef _ASM_GENERIC__TLB_H
 16#define _ASM_GENERIC__TLB_H
 17
 18#include <linux/swap.h>
 19#include <asm/pgalloc.h>
 20#include <asm/tlbflush.h>
 21
 22#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 23/*
 24 * Semi RCU freeing of the page directories.
 25 *
 26 * This is needed by some architectures to implement software pagetable walkers.
 27 *
 28 * gup_fast() and other software pagetable walkers do a lockless page-table
 29 * walk and therefore needs some synchronization with the freeing of the page
 30 * directories. The chosen means to accomplish that is by disabling IRQs over
 31 * the walk.
 32 *
 33 * Architectures that use IPIs to flush TLBs will then automagically DTRT,
 34 * since we unlink the page, flush TLBs, free the page. Since the disabling of
 35 * IRQs delays the completion of the TLB flush we can never observe an already
 36 * freed page.
 37 *
 38 * Architectures that do not have this (PPC) need to delay the freeing by some
 39 * other means, this is that means.
 40 *
 41 * What we do is batch the freed directory pages (tables) and RCU free them.
 42 * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
 43 * holds off grace periods.
 44 *
 45 * However, in order to batch these pages we need to allocate storage, this
 46 * allocation is deep inside the MM code and can thus easily fail on memory
 47 * pressure. To guarantee progress we fall back to single table freeing, see
 48 * the implementation of tlb_remove_table_one().
 49 *
 50 */
 51struct mmu_table_batch {
 52	struct rcu_head		rcu;
 53	unsigned int		nr;
 54	void			*tables[0];
 55};
 56
 57#define MAX_TABLE_BATCH		\
 58	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
 59
 60extern void tlb_table_flush(struct mmu_gather *tlb);
 61extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 62
 63#endif
 64
 65/*
 66 * If we can't allocate a page to make a big batch of page pointers
 67 * to work on, then just handle a few from the on-stack structure.
 68 */
 69#define MMU_GATHER_BUNDLE	8
 70
 71struct mmu_gather_batch {
 72	struct mmu_gather_batch	*next;
 73	unsigned int		nr;
 74	unsigned int		max;
 75	struct page		*pages[0];
 76};
 77
 78#define MAX_GATHER_BATCH	\
 79	((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
 80
 
 
 
 
 
 
 
 
 81/* struct mmu_gather is an opaque type used by the mm code for passing around
 82 * any data needed by arch specific code for tlb_remove_page.
 83 */
 84struct mmu_gather {
 85	struct mm_struct	*mm;
 86#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 87	struct mmu_table_batch	*batch;
 88#endif
 89	unsigned int		need_flush : 1,	/* Did free PTEs */
 90				fast_mode  : 1; /* No batching   */
 91
 92	unsigned int		fullmm;
 
 
 
 
 93
 94	struct mmu_gather_batch *active;
 95	struct mmu_gather_batch	local;
 96	struct page		*__pages[MMU_GATHER_BUNDLE];
 
 
 97};
 98
 99#define HAVE_GENERIC_MMU_GATHER
100
101static inline int tlb_fast_mode(struct mmu_gather *tlb)
 
 
 
 
 
 
 
 
 
 
102{
103#ifdef CONFIG_SMP
104	return tlb->fast_mode;
105#else
106	/*
107	 * For UP we don't need to worry about TLB flush
108	 * and page free order so much..
109	 */
110	return 1;
111#endif
112}
113
114void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
115void tlb_flush_mmu(struct mmu_gather *tlb);
116void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end);
117int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
119/* tlb_remove_page
120 *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
121 *	required.
122 */
123static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
124{
125	if (!__tlb_remove_page(tlb, page))
126		tlb_flush_mmu(tlb);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
129/**
130 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
131 *
132 * Record the fact that pte's were really umapped in ->need_flush, so we can
133 * later optimise away the tlb invalidate.   This helps when userspace is
134 * unmapping already-unmapped pages, which happens quite a lot.
135 */
136#define tlb_remove_tlb_entry(tlb, ptep, address)		\
137	do {							\
138		tlb->need_flush = 1;				\
139		__tlb_remove_tlb_entry(tlb, ptep, address);	\
140	} while (0)
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142#define pte_free_tlb(tlb, ptep, address)			\
143	do {							\
144		tlb->need_flush = 1;				\
145		__pte_free_tlb(tlb, ptep, address);		\
146	} while (0)
147
 
 
 
 
 
 
148#ifndef __ARCH_HAS_4LEVEL_HACK
149#define pud_free_tlb(tlb, pudp, address)			\
150	do {							\
151		tlb->need_flush = 1;				\
152		__pud_free_tlb(tlb, pudp, address);		\
153	} while (0)
154#endif
155
156#define pmd_free_tlb(tlb, pmdp, address)			\
 
157	do {							\
158		tlb->need_flush = 1;				\
159		__pmd_free_tlb(tlb, pmdp, address);		\
160	} while (0)
 
161
162#define tlb_migrate_finish(mm) do {} while (0)
163
164#endif /* _ASM_GENERIC__TLB_H */

  1/* include/asm-generic/tlb.h
  2 *
  3 *	Generic TLB shootdown code
  4 *
  5 * Copyright 2001 Red Hat, Inc.
  6 * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  7 *
  8 * Copyright 2011 Red Hat, Inc., Peter Zijlstra
  9 *
 10 * This program is free software; you can redistribute it and/or
 11 * modify it under the terms of the GNU General Public License
 12 * as published by the Free Software Foundation; either version
 13 * 2 of the License, or (at your option) any later version.
 14 */
 15#ifndef _ASM_GENERIC__TLB_H
 16#define _ASM_GENERIC__TLB_H
 17
 18#include <linux/swap.h>
 19#include <asm/pgalloc.h>
 20#include <asm/tlbflush.h>
 21
 22#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 23/*
 24 * Semi RCU freeing of the page directories.
 25 *
 26 * This is needed by some architectures to implement software pagetable walkers.
 27 *
 28 * gup_fast() and other software pagetable walkers do a lockless page-table
 29 * walk and therefore needs some synchronization with the freeing of the page
 30 * directories. The chosen means to accomplish that is by disabling IRQs over
 31 * the walk.
 32 *
 33 * Architectures that use IPIs to flush TLBs will then automagically DTRT,
 34 * since we unlink the page, flush TLBs, free the page. Since the disabling of
 35 * IRQs delays the completion of the TLB flush we can never observe an already
 36 * freed page.
 37 *
 38 * Architectures that do not have this (PPC) need to delay the freeing by some
 39 * other means, this is that means.
 40 *
 41 * What we do is batch the freed directory pages (tables) and RCU free them.
 42 * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
 43 * holds off grace periods.
 44 *
 45 * However, in order to batch these pages we need to allocate storage, this
 46 * allocation is deep inside the MM code and can thus easily fail on memory
 47 * pressure. To guarantee progress we fall back to single table freeing, see
 48 * the implementation of tlb_remove_table_one().
 49 *
 50 */
 51struct mmu_table_batch {
 52	struct rcu_head		rcu;
 53	unsigned int		nr;
 54	void			*tables[0];
 55};
 56
 57#define MAX_TABLE_BATCH		\
 58	((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
 59
 60extern void tlb_table_flush(struct mmu_gather *tlb);
 61extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 62
 63#endif
 64
 65/*
 66 * If we can't allocate a page to make a big batch of page pointers
 67 * to work on, then just handle a few from the on-stack structure.
 68 */
 69#define MMU_GATHER_BUNDLE	8
 70
 71struct mmu_gather_batch {
 72	struct mmu_gather_batch	*next;
 73	unsigned int		nr;
 74	unsigned int		max;
 75	struct page		*pages[0];
 76};
 77
 78#define MAX_GATHER_BATCH	\
 79	((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
 80
 81/*
 82 * Limit the maximum number of mmu_gather batches to reduce a risk of soft
 83 * lockups for non-preemptible kernels on huge machines when a lot of memory
 84 * is zapped during unmapping.
 85 * 10K pages freed at once should be safe even without a preemption point.
 86 */
 87#define MAX_GATHER_BATCH_COUNT	(10000UL/MAX_GATHER_BATCH)
 88
 89/* struct mmu_gather is an opaque type used by the mm code for passing around
 90 * any data needed by arch specific code for tlb_remove_page.
 91 */
 92struct mmu_gather {
 93	struct mm_struct	*mm;
 94#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 95	struct mmu_table_batch	*batch;
 96#endif
 97	unsigned long		start;
 98	unsigned long		end;
 99	/* we are in the middle of an operation to clear
100	 * a full mm and can make some optimizations */
101	unsigned int		fullmm : 1,
102	/* we have performed an operation which
103	 * requires a complete flush of the tlb */
104				need_flush_all : 1;
105
106	struct mmu_gather_batch *active;
107	struct mmu_gather_batch	local;
108	struct page		*__pages[MMU_GATHER_BUNDLE];
109	unsigned int		batch_count;
110	int page_size;
111};
112
113#define HAVE_GENERIC_MMU_GATHER
114
115void arch_tlb_gather_mmu(struct mmu_gather *tlb,
116	struct mm_struct *mm, unsigned long start, unsigned long end);
117void tlb_flush_mmu(struct mmu_gather *tlb);
118void arch_tlb_finish_mmu(struct mmu_gather *tlb,
119			 unsigned long start, unsigned long end, bool force);
120extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
121				   int page_size);
122
123static inline void __tlb_adjust_range(struct mmu_gather *tlb,
124				      unsigned long address,
125				      unsigned int range_size)
126{
127	tlb->start = min(tlb->start, address);
128	tlb->end = max(tlb->end, address + range_size);
 
 
 
 
 
 
 
129}
130
131static inline void __tlb_reset_range(struct mmu_gather *tlb)
132{
133	if (tlb->fullmm) {
134		tlb->start = tlb->end = ~0;
135	} else {
136		tlb->start = TASK_SIZE;
137		tlb->end = 0;
138	}
139}
140
141static inline void tlb_remove_page_size(struct mmu_gather *tlb,
142					struct page *page, int page_size)
143{
144	if (__tlb_remove_page_size(tlb, page, page_size))
145		tlb_flush_mmu(tlb);
146}
147
148static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
149{
150	return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
151}
152
153/* tlb_remove_page
154 *	Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
155 *	required.
156 */
157static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
158{
159	return tlb_remove_page_size(tlb, page, PAGE_SIZE);
160}
161
162#ifndef tlb_remove_check_page_size_change
163#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
164static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
165						     unsigned int page_size)
166{
167	/*
168	 * We don't care about page size change, just update
169	 * mmu_gather page size here so that debug checks
170	 * doesn't throw false warning.
171	 */
172#ifdef CONFIG_DEBUG_VM
173	tlb->page_size = page_size;
174#endif
175}
176#endif
177
178/*
179 * In the case of tlb vma handling, we can optimise these away in the
180 * case where we're doing a full MM flush.  When we're doing a munmap,
181 * the vmas are adjusted to only cover the region to be torn down.
182 */
183#ifndef tlb_start_vma
184#define tlb_start_vma(tlb, vma) do { } while (0)
185#endif
186
187#define __tlb_end_vma(tlb, vma)					\
188	do {							\
189		if (!tlb->fullmm && tlb->end) {			\
190			tlb_flush(tlb);				\
191			__tlb_reset_range(tlb);			\
192		}						\
193	} while (0)
194
195#ifndef tlb_end_vma
196#define tlb_end_vma	__tlb_end_vma
197#endif
198
199#ifndef __tlb_remove_tlb_entry
200#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
201#endif
202
203/**
204 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
205 *
206 * Record the fact that pte's were really unmapped by updating the range,
207 * so we can later optimise away the tlb invalidate.   This helps when
208 * userspace is unmapping already-unmapped pages, which happens quite a lot.
209 */
210#define tlb_remove_tlb_entry(tlb, ptep, address)		\
211	do {							\
212		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
213		__tlb_remove_tlb_entry(tlb, ptep, address);	\
214	} while (0)
215
216#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	     \
217	do {							     \
218		__tlb_adjust_range(tlb, address, huge_page_size(h)); \
219		__tlb_remove_tlb_entry(tlb, ptep, address);	     \
220	} while (0)
221
222/**
223 * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
224 * This is a nop so far, because only x86 needs it.
225 */
226#ifndef __tlb_remove_pmd_tlb_entry
227#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
228#endif
229
230#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)			\
231	do {								\
232		__tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);	\
233		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);		\
234	} while (0)
235
236/**
237 * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
238 * invalidation. This is a nop so far, because only x86 needs it.
239 */
240#ifndef __tlb_remove_pud_tlb_entry
241#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
242#endif
243
244#define tlb_remove_pud_tlb_entry(tlb, pudp, address)			\
245	do {								\
246		__tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);	\
247		__tlb_remove_pud_tlb_entry(tlb, pudp, address);		\
248	} while (0)
249
250/*
251 * For things like page tables caches (ie caching addresses "inside" the
252 * page tables, like x86 does), for legacy reasons, flushing an
253 * individual page had better flush the page table caches behind it. This
254 * is definitely how x86 works, for example. And if you have an
255 * architected non-legacy page table cache (which I'm not aware of
256 * anybody actually doing), you're going to have some architecturally
257 * explicit flushing for that, likely *separate* from a regular TLB entry
258 * flush, and thus you'd need more than just some range expansion..
259 *
260 * So if we ever find an architecture
261 * that would want something that odd, I think it is up to that
262 * architecture to do its own odd thing, not cause pain for others
263 * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
264 *
265 * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
266 */
267
268#define pte_free_tlb(tlb, ptep, address)			\
269	do {							\
270		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
271		__pte_free_tlb(tlb, ptep, address);		\
272	} while (0)
273
274#define pmd_free_tlb(tlb, pmdp, address)			\
275	do {							\
276		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
277		__pmd_free_tlb(tlb, pmdp, address);		\
278	} while (0)
279
280#ifndef __ARCH_HAS_4LEVEL_HACK
281#define pud_free_tlb(tlb, pudp, address)			\
282	do {							\
283		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
284		__pud_free_tlb(tlb, pudp, address);		\
285	} while (0)
286#endif
287
288#ifndef __ARCH_HAS_5LEVEL_HACK
289#define p4d_free_tlb(tlb, pudp, address)			\
290	do {							\
291		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
292		__p4d_free_tlb(tlb, pudp, address);		\
293	} while (0)
294#endif
295
296#define tlb_migrate_finish(mm) do {} while (0)
297
298#endif /* _ASM_GENERIC__TLB_H */