mmu_context.h - arch/x86/include/asm/mmu_context.h - Linux diff v4.6

 
  1#ifndef _ASM_X86_MMU_CONTEXT_H
  2#define _ASM_X86_MMU_CONTEXT_H
  3
  4#include <asm/desc.h>
  5#include <linux/atomic.h>
  6#include <linux/mm_types.h>
 
  7
  8#include <trace/events/tlb.h>
  9
 10#include <asm/pgalloc.h>
 11#include <asm/tlbflush.h>
 12#include <asm/paravirt.h>
 13#include <asm/mpx.h>
 14#ifndef CONFIG_PARAVIRT
 15static inline void paravirt_activate_mm(struct mm_struct *prev,
 16					struct mm_struct *next)
 17{
 18}
 19#endif	/* !CONFIG_PARAVIRT */
 20
 21#ifdef CONFIG_PERF_EVENTS
 22extern struct static_key rdpmc_always_available;
 23
 24static inline void load_mm_cr4(struct mm_struct *mm)
 25{
 26	if (static_key_false(&rdpmc_always_available) ||
 27	    atomic_read(&mm->context.perf_rdpmc_allowed))
 28		cr4_set_bits(X86_CR4_PCE);
 29	else
 30		cr4_clear_bits(X86_CR4_PCE);
 31}
 32#else
 33static inline void load_mm_cr4(struct mm_struct *mm) {}
 34#endif
 35
 36#ifdef CONFIG_MODIFY_LDT_SYSCALL
 37/*
 38 * ldt_structs can be allocated, used, and freed, but they are never
 39 * modified while live.
 40 */
 41struct ldt_struct {
 42	/*
 43	 * Xen requires page-aligned LDTs with special permissions.  This is
 44	 * needed to prevent us from installing evil descriptors such as
 45	 * call gates.  On native, we could merge the ldt_struct and LDT
 46	 * allocations, but it's not worth trying to optimize.
 47	 */
 48	struct desc_struct *entries;
 49	int size;
 
 
 
 
 
 
 
 
 
 
 
 50};
 51
 52/*
 53 * Used for LDT copy/destruction.
 54 */
 55int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
 
 
 
 
 
 56void destroy_context_ldt(struct mm_struct *mm);
 
 57#else	/* CONFIG_MODIFY_LDT_SYSCALL */
 58static inline int init_new_context_ldt(struct task_struct *tsk,
 59				       struct mm_struct *mm)
 
 60{
 61	return 0;
 62}
 63static inline void destroy_context_ldt(struct mm_struct *mm) {}
 
 64#endif
 65
 
 
 
 
 66static inline void load_mm_ldt(struct mm_struct *mm)
 67{
 68#ifdef CONFIG_MODIFY_LDT_SYSCALL
 69	struct ldt_struct *ldt;
 
 
 
 
 
 70
 71	/* lockless_dereference synchronizes with smp_store_release */
 72	ldt = lockless_dereference(mm->context.ldt);
 
 
 
 73
 74	/*
 75	 * Any change to mm->context.ldt is followed by an IPI to all
 76	 * CPUs with the mm active.  The LDT will not be freed until
 77	 * after the IPI is handled by all such CPUs.  This means that,
 78	 * if the ldt_struct changes before we return, the values we see
 79	 * will be safe, and the new values will be loaded before we run
 80	 * any user code.
 81	 *
 82	 * NB: don't try to convert this to use RCU without extreme care.
 83	 * We would still need IRQs off, because we don't want to change
 84	 * the local LDT after an IPI loaded a newer value than the one
 85	 * that we can see.
 86	 */
 87
 88	if (unlikely(ldt))
 89		set_ldt(ldt->entries, ldt->size);
 90	else
 91		clear_LDT();
 92#else
 93	clear_LDT();
 94#endif
 95
 96	DEBUG_LOCKS_WARN_ON(preemptible());
 
 
 97}
 98
 99static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 
100{
101#ifdef CONFIG_SMP
102	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
103		this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
104#endif
105}
 
106
107static inline int init_new_context(struct task_struct *tsk,
108				   struct mm_struct *mm)
109{
110	init_new_context_ldt(tsk, mm);
111	return 0;
112}
113static inline void destroy_context(struct mm_struct *mm)
 
114{
115	destroy_context_ldt(mm);
116}
117
118static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
119			     struct task_struct *tsk)
120{
121	unsigned cpu = smp_processor_id();
122
123	if (likely(prev != next)) {
124#ifdef CONFIG_SMP
125		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
126		this_cpu_write(cpu_tlbstate.active_mm, next);
127#endif
128		cpumask_set_cpu(cpu, mm_cpumask(next));
129
130		/*
131		 * Re-load page tables.
132		 *
133		 * This logic has an ordering constraint:
134		 *
135		 *  CPU 0: Write to a PTE for 'next'
136		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
137		 *  CPU 1: set bit 1 in next's mm_cpumask
138		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
139		 *
140		 * We need to prevent an outcome in which CPU 1 observes
141		 * the new PTE value and CPU 0 observes bit 1 clear in
142		 * mm_cpumask.  (If that occurs, then the IPI will never
143		 * be sent, and CPU 0's TLB will contain a stale entry.)
144		 *
145		 * The bad outcome can occur if either CPU's load is
146		 * reordered before that CPU's store, so both CPUs must
147		 * execute full barriers to prevent this from happening.
148		 *
149		 * Thus, switch_mm needs a full barrier between the
150		 * store to mm_cpumask and any operation that could load
151		 * from next->pgd.  TLB fills are special and can happen
152		 * due to instruction fetches or for no reason at all,
153		 * and neither LOCK nor MFENCE orders them.
154		 * Fortunately, load_cr3() is serializing and gives the
155		 * ordering guarantee we need.
156		 *
157		 */
158		load_cr3(next->pgd);
159
160		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 
161
162		/* Stop flush ipis for the previous mm */
163		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 
 
 
 
 
 
 
164
165		/* Load per-mm CR4 state */
166		load_mm_cr4(next);
167
168#ifdef CONFIG_MODIFY_LDT_SYSCALL
169		/*
170		 * Load the LDT, if the LDT is different.
171		 *
172		 * It's possible that prev->context.ldt doesn't match
173		 * the LDT register.  This can happen if leave_mm(prev)
174		 * was called and then modify_ldt changed
175		 * prev->context.ldt but suppressed an IPI to this CPU.
176		 * In this case, prev->context.ldt != NULL, because we
177		 * never set context.ldt to NULL while the mm still
178		 * exists.  That means that next->context.ldt !=
179		 * prev->context.ldt, because mms never share an LDT.
180		 */
181		if (unlikely(prev->context.ldt != next->context.ldt))
182			load_mm_ldt(next);
183#endif
184	}
185#ifdef CONFIG_SMP
186	  else {
187		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
188		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
189
190		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
191			/*
192			 * On established mms, the mm_cpumask is only changed
193			 * from irq context, from ptep_clear_flush() while in
194			 * lazy tlb mode, and here. Irqs are blocked during
195			 * schedule, protecting us from simultaneous changes.
196			 */
197			cpumask_set_cpu(cpu, mm_cpumask(next));
198
199			/*
200			 * We were in lazy tlb mode and leave_mm disabled
201			 * tlb flush IPI delivery. We must reload CR3
202			 * to make sure to use no freed page tables.
203			 *
204			 * As above, load_cr3() is serializing and orders TLB
205			 * fills with respect to the mm_cpumask write.
206			 */
207			load_cr3(next->pgd);
208			trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
209			load_mm_cr4(next);
210			load_mm_ldt(next);
211		}
212	}
213#endif
 
 
 
214}
215
 
 
 
 
 
 
 
 
 
 
 
 
 
216#define activate_mm(prev, next)			\
217do {						\
218	paravirt_activate_mm((prev), (next));	\
219	switch_mm((prev), (next), NULL);	\
220} while (0);
221
222#ifdef CONFIG_X86_32
223#define deactivate_mm(tsk, mm)			\
224do {						\
225	lazy_load_gs(0);			\
226} while (0)
227#else
228#define deactivate_mm(tsk, mm)			\
229do {						\
 
230	load_gs_index(0);			\
231	loadsegment(fs, 0);			\
232} while (0)
233#endif
234
235static inline void arch_dup_mmap(struct mm_struct *oldmm,
236				 struct mm_struct *mm)
 
 
 
 
 
 
 
 
 
 
 
 
237{
238	paravirt_arch_dup_mmap(oldmm, mm);
 
 
 
239}
240
241static inline void arch_exit_mmap(struct mm_struct *mm)
242{
243	paravirt_arch_exit_mmap(mm);
 
244}
245
246#ifdef CONFIG_X86_64
247static inline bool is_64bit_mm(struct mm_struct *mm)
248{
249	return	!config_enabled(CONFIG_IA32_EMULATION) ||
250		!(mm->context.ia32_compat == TIF_IA32);
251}
252#else
253static inline bool is_64bit_mm(struct mm_struct *mm)
254{
255	return false;
256}
257#endif
258
259static inline void arch_bprm_mm_init(struct mm_struct *mm,
260		struct vm_area_struct *vma)
261{
262	mpx_mm_init(mm);
263}
264
265static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
266			      unsigned long start, unsigned long end)
267{
268	/*
269	 * mpx_notify_unmap() goes and reads a rarely-hot
270	 * cacheline in the mm_struct.  That can be expensive
271	 * enough to be seen in profiles.
272	 *
273	 * The mpx_notify_unmap() call and its contents have been
274	 * observed to affect munmap() performance on hardware
275	 * where MPX is not present.
276	 *
277	 * The unlikely() optimizes for the fast case: no MPX
278	 * in the CPU, or no MPX use in the process.  Even if
279	 * we get this wrong (in the unlikely event that MPX
280	 * is widely enabled on some system) the overhead of
281	 * MPX itself (reading bounds tables) is expected to
282	 * overwhelm the overhead of getting this unlikely()
283	 * consistently wrong.
284	 */
285	if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
286		mpx_notify_unmap(mm, vma, start, end);
287}
288
289static inline int vma_pkey(struct vm_area_struct *vma)
290{
291	u16 pkey = 0;
292#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
293	unsigned long vma_pkey_mask = VM_PKEY_BIT0 | VM_PKEY_BIT1 |
294				      VM_PKEY_BIT2 | VM_PKEY_BIT3;
295	pkey = (vma->vm_flags & vma_pkey_mask) >> VM_PKEY_SHIFT;
296#endif
297	return pkey;
298}
299
300static inline bool __pkru_allows_pkey(u16 pkey, bool write)
301{
302	u32 pkru = read_pkru();
303
304	if (!__pkru_allows_read(pkru, pkey))
305		return false;
306	if (write && !__pkru_allows_write(pkru, pkey))
307		return false;
308
309	return true;
310}
311
312/*
313 * We only want to enforce protection keys on the current process
314 * because we effectively have no access to PKRU for other
315 * processes or any way to tell *which * PKRU in a threaded
316 * process we could use.
317 *
318 * So do not enforce things if the VMA is not from the current
319 * mm, or if we are in a kernel thread.
320 */
321static inline bool vma_is_foreign(struct vm_area_struct *vma)
322{
323	if (!current->mm)
324		return true;
325	/*
326	 * Should PKRU be enforced on the access to this VMA?  If
327	 * the VMA is from another process, then PKRU has no
328	 * relevance and should not be enforced.
329	 */
330	if (current->mm != vma->vm_mm)
331		return true;
332
333	return false;
334}
335
336static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
337		bool write, bool execute, bool foreign)
338{
339	/* pkeys never affect instruction fetches */
340	if (execute)
341		return true;
342	/* allow access if the VMA is not one from this process */
343	if (foreign || vma_is_foreign(vma))
344		return true;
345	return __pkru_allows_pkey(vma_pkey(vma), write);
346}
347
348static inline bool arch_pte_access_permitted(pte_t pte, bool write)
349{
350	return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write);
351}
352
353#endif /* _ASM_X86_MMU_CONTEXT_H */

  1/* SPDX-License-Identifier: GPL-2.0 */
  2#ifndef _ASM_X86_MMU_CONTEXT_H
  3#define _ASM_X86_MMU_CONTEXT_H
  4
  5#include <asm/desc.h>
  6#include <linux/atomic.h>
  7#include <linux/mm_types.h>
  8#include <linux/pkeys.h>
  9
 10#include <trace/events/tlb.h>
 11
 
 12#include <asm/tlbflush.h>
 13#include <asm/paravirt.h>
 14#include <asm/debugreg.h>
 15#include <asm/gsseg.h>
 
 
 
 
 
 16
 17extern atomic64_t last_mm_ctx_id;
 
 18
 19#ifdef CONFIG_PERF_EVENTS
 20DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);
 21DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
 22void cr4_update_pce(void *ignored);
 
 
 
 
 
 
 23#endif
 24
 25#ifdef CONFIG_MODIFY_LDT_SYSCALL
 26/*
 27 * ldt_structs can be allocated, used, and freed, but they are never
 28 * modified while live.
 29 */
 30struct ldt_struct {
 31	/*
 32	 * Xen requires page-aligned LDTs with special permissions.  This is
 33	 * needed to prevent us from installing evil descriptors such as
 34	 * call gates.  On native, we could merge the ldt_struct and LDT
 35	 * allocations, but it's not worth trying to optimize.
 36	 */
 37	struct desc_struct	*entries;
 38	unsigned int		nr_entries;
 39
 40	/*
 41	 * If PTI is in use, then the entries array is not mapped while we're
 42	 * in user mode.  The whole array will be aliased at the addressed
 43	 * given by ldt_slot_va(slot).  We use two slots so that we can allocate
 44	 * and map, and enable a new LDT without invalidating the mapping
 45	 * of an older, still-in-use LDT.
 46	 *
 47	 * slot will be -1 if this LDT doesn't have an alias mapping.
 48	 */
 49	int			slot;
 50};
 51
 52/*
 53 * Used for LDT copy/destruction.
 54 */
 55static inline void init_new_context_ldt(struct mm_struct *mm)
 56{
 57	mm->context.ldt = NULL;
 58	init_rwsem(&mm->context.ldt_usr_sem);
 59}
 60int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
 61void destroy_context_ldt(struct mm_struct *mm);
 62void ldt_arch_exit_mmap(struct mm_struct *mm);
 63#else	/* CONFIG_MODIFY_LDT_SYSCALL */
 64static inline void init_new_context_ldt(struct mm_struct *mm) { }
 65static inline int ldt_dup_context(struct mm_struct *oldmm,
 66				  struct mm_struct *mm)
 67{
 68	return 0;
 69}
 70static inline void destroy_context_ldt(struct mm_struct *mm) { }
 71static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
 72#endif
 73
 74#ifdef CONFIG_MODIFY_LDT_SYSCALL
 75extern void load_mm_ldt(struct mm_struct *mm);
 76extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next);
 77#else
 78static inline void load_mm_ldt(struct mm_struct *mm)
 79{
 80	clear_LDT();
 81}
 82static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 83{
 84	DEBUG_LOCKS_WARN_ON(preemptible());
 85}
 86#endif
 87
 88#ifdef CONFIG_ADDRESS_MASKING
 89static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
 90{
 91	return mm->context.lam_cr3_mask;
 92}
 93
 94static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
 95{
 96	mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
 97	mm->context.untag_mask = oldmm->context.untag_mask;
 98}
 
 
 
 
 
 
 
 
 99
100#define mm_untag_mask mm_untag_mask
101static inline unsigned long mm_untag_mask(struct mm_struct *mm)
102{
103	return mm->context.untag_mask;
104}
 
 
105
106static inline void mm_reset_untag_mask(struct mm_struct *mm)
107{
108	mm->context.untag_mask = -1UL;
109}
110
111#define arch_pgtable_dma_compat arch_pgtable_dma_compat
112static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
113{
114	return !mm_lam_cr3_mask(mm) ||
115		test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags);
 
 
116}
117#else
118
119static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
 
120{
 
121	return 0;
122}
123
124static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
125{
 
126}
127
128static inline void mm_reset_untag_mask(struct mm_struct *mm)
 
129{
130}
 
 
 
 
 
131#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
133#define enter_lazy_tlb enter_lazy_tlb
134extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
135
136/*
137 * Init a new mm.  Used on mm copies, like at fork()
138 * and on mm's that are brand-new, like at execve().
139 */
140#define init_new_context init_new_context
141static inline int init_new_context(struct task_struct *tsk,
142				   struct mm_struct *mm)
143{
144	mutex_init(&mm->context.lock);
145
146	mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
147	atomic64_set(&mm->context.tlb_gen, 0);
148
149#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
150	if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
151		/* pkey 0 is the default and allocated implicitly */
152		mm->context.pkey_allocation_map = 0x1;
153		/* -1 means unallocated or invalid */
154		mm->context.execute_only_pkey = -1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155	}
156#endif
157	mm_reset_untag_mask(mm);
158	init_new_context_ldt(mm);
159	return 0;
160}
161
162#define destroy_context destroy_context
163static inline void destroy_context(struct mm_struct *mm)
164{
165	destroy_context_ldt(mm);
166}
167
168extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
169		      struct task_struct *tsk);
170
171extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
172			       struct task_struct *tsk);
173#define switch_mm_irqs_off switch_mm_irqs_off
174
175#define activate_mm(prev, next)			\
176do {						\
177	paravirt_enter_mmap(next);		\
178	switch_mm((prev), (next), NULL);	\
179} while (0);
180
181#ifdef CONFIG_X86_32
182#define deactivate_mm(tsk, mm)			\
183do {						\
184	loadsegment(gs, 0);			\
185} while (0)
186#else
187#define deactivate_mm(tsk, mm)			\
188do {						\
189	shstk_free(tsk);			\
190	load_gs_index(0);			\
191	loadsegment(fs, 0);			\
192} while (0)
193#endif
194
195static inline void arch_dup_pkeys(struct mm_struct *oldmm,
196				  struct mm_struct *mm)
197{
198#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
199	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
200		return;
201
202	/* Duplicate the oldmm pkey state in mm: */
203	mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;
204	mm->context.execute_only_pkey   = oldmm->context.execute_only_pkey;
205#endif
206}
207
208static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
209{
210	arch_dup_pkeys(oldmm, mm);
211	paravirt_enter_mmap(mm);
212	dup_lam(oldmm, mm);
213	return ldt_dup_context(oldmm, mm);
214}
215
216static inline void arch_exit_mmap(struct mm_struct *mm)
217{
218	paravirt_arch_exit_mmap(mm);
219	ldt_arch_exit_mmap(mm);
220}
221
222#ifdef CONFIG_X86_64
223static inline bool is_64bit_mm(struct mm_struct *mm)
224{
225	return	!IS_ENABLED(CONFIG_IA32_EMULATION) ||
226		!test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags);
227}
228#else
229static inline bool is_64bit_mm(struct mm_struct *mm)
230{
231	return false;
232}
233#endif
234
235static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
236			      unsigned long end)
237{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238}
239
240/*
241 * We only want to enforce protection keys on the current process
242 * because we effectively have no access to PKRU for other
243 * processes or any way to tell *which * PKRU in a threaded
244 * process we could use.
245 *
246 * So do not enforce things if the VMA is not from the current
247 * mm, or if we are in a kernel thread.
248 */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
250		bool write, bool execute, bool foreign)
251{
252	/* pkeys never affect instruction fetches */
253	if (execute)
254		return true;
255	/* allow access if the VMA is not one from this process */
256	if (foreign || vma_is_foreign(vma))
257		return true;
258	return __pkru_allows_pkey(vma_pkey(vma), write);
259}
260
261unsigned long __get_current_cr3_fast(void);
262
263#include <asm-generic/mmu_context.h>
 
264
265#endif /* _ASM_X86_MMU_CONTEXT_H */