Linux Audio

Check our new training course

Embedded Linux training

Mar 31-Apr 8, 2025
Register
Loading...
v3.5.6
 
 
 
  1#include <linux/module.h>
  2#include <linux/sched.h>
 
  3#include <linux/mutex.h>
  4#include <linux/list.h>
  5#include <linux/stringify.h>
  6#include <linux/kprobes.h>
  7#include <linux/mm.h>
  8#include <linux/vmalloc.h>
  9#include <linux/memory.h>
 10#include <linux/stop_machine.h>
 11#include <linux/slab.h>
 
 
 
 
 
 
 12#include <asm/alternative.h>
 13#include <asm/sections.h>
 14#include <asm/pgtable.h>
 15#include <asm/mce.h>
 16#include <asm/nmi.h>
 17#include <asm/cacheflush.h>
 18#include <asm/tlbflush.h>
 
 19#include <asm/io.h>
 20#include <asm/fixmap.h>
 
 
 
 21
 22#define MAX_PATCH_LEN (255-1)
 23
 24#ifdef CONFIG_HOTPLUG_CPU
 25static int smp_alt_once;
 26
 27static int __init bootonly(char *str)
 28{
 29	smp_alt_once = 1;
 30	return 1;
 31}
 32__setup("smp-alt-boot", bootonly);
 33#else
 34#define smp_alt_once 1
 35#endif
 36
 37static int __initdata_or_module debug_alternative;
 38
 39static int __init debug_alt(char *str)
 40{
 41	debug_alternative = 1;
 
 
 
 
 
 42	return 1;
 43}
 44__setup("debug-alternative", debug_alt);
 45
 46static int noreplace_smp;
 47
 48static int __init setup_noreplace_smp(char *str)
 49{
 50	noreplace_smp = 1;
 51	return 1;
 52}
 53__setup("noreplace-smp", setup_noreplace_smp);
 54
 55#ifdef CONFIG_PARAVIRT
 56static int __initdata_or_module noreplace_paravirt = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 57
 58static int __init setup_noreplace_paravirt(char *str)
 59{
 60	noreplace_paravirt = 1;
 61	return 1;
 62}
 63__setup("noreplace-paravirt", setup_noreplace_paravirt);
 
 
 
 
 
 
 
 
 
 64#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 65
 66#define DPRINTK(fmt, args...) if (debug_alternative) \
 67	printk(KERN_DEBUG fmt, args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 68
 69/*
 70 * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
 71 * that correspond to that nop. Getting from one nop to the next, we
 72 * add to the array the offset that is equal to the sum of all sizes of
 73 * nops preceding the one we are after.
 74 *
 75 * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
 76 * nice symmetry of sizes of the previous nops.
 77 */
 78#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
 79static const unsigned char intelnops[] =
 80{
 81	GENERIC_NOP1,
 82	GENERIC_NOP2,
 83	GENERIC_NOP3,
 84	GENERIC_NOP4,
 85	GENERIC_NOP5,
 86	GENERIC_NOP6,
 87	GENERIC_NOP7,
 88	GENERIC_NOP8,
 89	GENERIC_NOP5_ATOMIC
 90};
 91static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
 92{
 93	NULL,
 94	intelnops,
 95	intelnops + 1,
 96	intelnops + 1 + 2,
 97	intelnops + 1 + 2 + 3,
 98	intelnops + 1 + 2 + 3 + 4,
 99	intelnops + 1 + 2 + 3 + 4 + 5,
100	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
101	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
102	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
103};
104#endif
105
106#ifdef K8_NOP1
107static const unsigned char k8nops[] =
 
 
 
 
 
 
108{
109	K8_NOP1,
110	K8_NOP2,
111	K8_NOP3,
112	K8_NOP4,
113	K8_NOP5,
114	K8_NOP6,
115	K8_NOP7,
116	K8_NOP8,
117	K8_NOP5_ATOMIC
118};
119static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
 
 
 
 
 
 
 
 
120{
121	NULL,
122	k8nops,
123	k8nops + 1,
124	k8nops + 1 + 2,
125	k8nops + 1 + 2 + 3,
126	k8nops + 1 + 2 + 3 + 4,
127	k8nops + 1 + 2 + 3 + 4 + 5,
128	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
129	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
130	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
131};
132#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
133
134#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
135static const unsigned char k7nops[] =
 
 
 
136{
137	K7_NOP1,
138	K7_NOP2,
139	K7_NOP3,
140	K7_NOP4,
141	K7_NOP5,
142	K7_NOP6,
143	K7_NOP7,
144	K7_NOP8,
145	K7_NOP5_ATOMIC
146};
147static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
 
 
 
 
148{
149	NULL,
150	k7nops,
151	k7nops + 1,
152	k7nops + 1 + 2,
153	k7nops + 1 + 2 + 3,
154	k7nops + 1 + 2 + 3 + 4,
155	k7nops + 1 + 2 + 3 + 4 + 5,
156	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
157	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
158	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
159};
160#endif
161
162#ifdef P6_NOP1
163static const unsigned char p6nops[] =
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164{
165	P6_NOP1,
166	P6_NOP2,
167	P6_NOP3,
168	P6_NOP4,
169	P6_NOP5,
170	P6_NOP6,
171	P6_NOP7,
172	P6_NOP8,
173	P6_NOP5_ATOMIC
174};
175static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
176{
177	NULL,
178	p6nops,
179	p6nops + 1,
180	p6nops + 1 + 2,
181	p6nops + 1 + 2 + 3,
182	p6nops + 1 + 2 + 3 + 4,
183	p6nops + 1 + 2 + 3 + 4 + 5,
184	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
185	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
186	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
187};
188#endif
189
190/* Initialize these to a safe default */
191#ifdef CONFIG_X86_64
192const unsigned char * const *ideal_nops = p6_nops;
193#else
194const unsigned char * const *ideal_nops = intel_nops;
195#endif
196
197void __init arch_init_ideal_nops(void)
198{
199	switch (boot_cpu_data.x86_vendor) {
200	case X86_VENDOR_INTEL:
201		/*
202		 * Due to a decoder implementation quirk, some
203		 * specific Intel CPUs actually perform better with
204		 * the "k8_nops" than with the SDM-recommended NOPs.
205		 */
206		if (boot_cpu_data.x86 == 6 &&
207		    boot_cpu_data.x86_model >= 0x0f &&
208		    boot_cpu_data.x86_model != 0x1c &&
209		    boot_cpu_data.x86_model != 0x26 &&
210		    boot_cpu_data.x86_model != 0x27 &&
211		    boot_cpu_data.x86_model < 0x30) {
212			ideal_nops = k8_nops;
213		} else if (boot_cpu_has(X86_FEATURE_NOPL)) {
214			   ideal_nops = p6_nops;
215		} else {
216#ifdef CONFIG_X86_64
217			ideal_nops = k8_nops;
218#else
219			ideal_nops = intel_nops;
220#endif
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221		}
222		break;
223	default:
224#ifdef CONFIG_X86_64
225		ideal_nops = k8_nops;
226#else
227		if (boot_cpu_has(X86_FEATURE_K8))
228			ideal_nops = k8_nops;
229		else if (boot_cpu_has(X86_FEATURE_K7))
230			ideal_nops = k7_nops;
231		else
232			ideal_nops = intel_nops;
233#endif
234	}
235}
236
237/* Use this to add nops to a buffer, then text_poke the whole buffer. */
238static void __init_or_module add_nops(void *insns, unsigned int len)
 
 
 
239{
240	while (len > 0) {
241		unsigned int noplen = len;
242		if (noplen > ASM_NOP_MAX)
243			noplen = ASM_NOP_MAX;
244		memcpy(insns, ideal_nops[noplen], noplen);
245		insns += noplen;
246		len -= noplen;
247	}
248}
 
249
250extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
251extern s32 __smp_locks[], __smp_locks_end[];
252void *text_poke_early(void *addr, const void *opcode, size_t len);
253
254/* Replace instructions with better alternatives for this CPU type.
255   This runs before SMP is initialized to avoid SMP problems with
256   self modifying code. This implies that asymmetric systems where
257   APs have less capabilities than the boot processor are not handled.
258   Tough. Make sure you disable such features by hand. */
 
 
 
259
260void __init_or_module apply_alternatives(struct alt_instr *start,
261					 struct alt_instr *end)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262{
263	struct alt_instr *a;
264	u8 *instr, *replacement;
265	u8 insnbuf[MAX_PATCH_LEN];
 
 
 
 
 
 
 
 
 
 
 
 
266
267	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
268	/*
269	 * The scan order should be from start to end. A later scanned
270	 * alternative code can overwrite a previous scanned alternative code.
271	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
272	 * patch code.
273	 *
274	 * So be careful if you want to change the scan order to any other
275	 * order.
276	 */
277	for (a = start; a < end; a++) {
 
 
278		instr = (u8 *)&a->instr_offset + a->instr_offset;
279		replacement = (u8 *)&a->repl_offset + a->repl_offset;
280		BUG_ON(a->replacementlen > a->instrlen);
281		BUG_ON(a->instrlen > sizeof(insnbuf));
282		BUG_ON(a->cpuid >= NCAPINTS*32);
283		if (!boot_cpu_has(a->cpuid))
 
 
 
 
 
 
 
284			continue;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
286		memcpy(insnbuf, replacement, a->replacementlen);
287
288		/* 0xe8 is a relative jump; fix the offset. */
289		if (*insnbuf == 0xe8 && a->replacementlen == 5)
290		    *(s32 *)(insnbuf + 1) += replacement - instr;
291
292		add_nops(insnbuf + a->replacementlen,
293			 a->instrlen - a->replacementlen);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
295		text_poke_early(instr, insnbuf, a->instrlen);
 
 
296	}
 
 
 
 
 
 
 
 
297}
298
299#ifdef CONFIG_SMP
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301static void alternatives_smp_lock(const s32 *start, const s32 *end,
302				  u8 *text, u8 *text_end)
303{
304	const s32 *poff;
305
306	mutex_lock(&text_mutex);
307	for (poff = start; poff < end; poff++) {
308		u8 *ptr = (u8 *)poff + *poff;
309
310		if (!*poff || ptr < text || ptr >= text_end)
311			continue;
312		/* turn DS segment override prefix into lock prefix */
313		if (*ptr == 0x3e)
314			text_poke(ptr, ((unsigned char []){0xf0}), 1);
315	};
316	mutex_unlock(&text_mutex);
317}
318
319static void alternatives_smp_unlock(const s32 *start, const s32 *end,
320				    u8 *text, u8 *text_end)
321{
322	const s32 *poff;
323
324	if (noreplace_smp)
325		return;
326
327	mutex_lock(&text_mutex);
328	for (poff = start; poff < end; poff++) {
329		u8 *ptr = (u8 *)poff + *poff;
330
331		if (!*poff || ptr < text || ptr >= text_end)
332			continue;
333		/* turn lock prefix into DS segment override prefix */
334		if (*ptr == 0xf0)
335			text_poke(ptr, ((unsigned char []){0x3E}), 1);
336	};
337	mutex_unlock(&text_mutex);
338}
339
340struct smp_alt_module {
341	/* what is this ??? */
342	struct module	*mod;
343	char		*name;
344
345	/* ptrs to lock prefixes */
346	const s32	*locks;
347	const s32	*locks_end;
348
349	/* .text segment, needed to avoid patching init code ;) */
350	u8		*text;
351	u8		*text_end;
352
353	struct list_head next;
354};
355static LIST_HEAD(smp_alt_modules);
356static DEFINE_MUTEX(smp_alt);
357static int smp_mode = 1;	/* protected by smp_alt */
358
359void __init_or_module alternatives_smp_module_add(struct module *mod,
360						  char *name,
361						  void *locks, void *locks_end,
362						  void *text,  void *text_end)
363{
364	struct smp_alt_module *smp;
365
366	if (noreplace_smp)
367		return;
 
368
369	if (smp_alt_once) {
370		if (boot_cpu_has(X86_FEATURE_UP))
371			alternatives_smp_unlock(locks, locks_end,
372						text, text_end);
373		return;
374	}
375
376	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
377	if (NULL == smp)
378		return; /* we'll run the (safe but slow) SMP code then ... */
 
379
380	smp->mod	= mod;
381	smp->name	= name;
382	smp->locks	= locks;
383	smp->locks_end	= locks_end;
384	smp->text	= text;
385	smp->text_end	= text_end;
386	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
387		__func__, smp->locks, smp->locks_end,
388		smp->text, smp->text_end, smp->name);
389
390	mutex_lock(&smp_alt);
391	list_add_tail(&smp->next, &smp_alt_modules);
392	if (boot_cpu_has(X86_FEATURE_UP))
393		alternatives_smp_unlock(smp->locks, smp->locks_end,
394					smp->text, smp->text_end);
395	mutex_unlock(&smp_alt);
396}
397
398void __init_or_module alternatives_smp_module_del(struct module *mod)
399{
400	struct smp_alt_module *item;
401
402	if (smp_alt_once || noreplace_smp)
403		return;
404
405	mutex_lock(&smp_alt);
406	list_for_each_entry(item, &smp_alt_modules, next) {
407		if (mod != item->mod)
408			continue;
409		list_del(&item->next);
410		mutex_unlock(&smp_alt);
411		DPRINTK("%s: %s\n", __func__, item->name);
412		kfree(item);
413		return;
414	}
415	mutex_unlock(&smp_alt);
416}
417
418bool skip_smp_alternatives;
419void alternatives_smp_switch(int smp)
420{
421	struct smp_alt_module *mod;
422
423#ifdef CONFIG_LOCKDEP
424	/*
425	 * Older binutils section handling bug prevented
426	 * alternatives-replacement from working reliably.
427	 *
428	 * If this still occurs then you should see a hang
429	 * or crash shortly after this line:
430	 */
431	printk("lockdep: fixing up alternatives.\n");
432#endif
433
434	if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
435		return;
436	BUG_ON(!smp && (num_online_cpus() > 1));
437
438	mutex_lock(&smp_alt);
439
440	/*
441	 * Avoid unnecessary switches because it forces JIT based VMs to
442	 * throw away all cached translations, which can be quite costly.
443	 */
444	if (smp == smp_mode) {
445		/* nothing */
446	} else if (smp) {
447		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
448		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
449		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
450		list_for_each_entry(mod, &smp_alt_modules, next)
451			alternatives_smp_lock(mod->locks, mod->locks_end,
452					      mod->text, mod->text_end);
453	} else {
454		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
455		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
456		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
457		list_for_each_entry(mod, &smp_alt_modules, next)
458			alternatives_smp_unlock(mod->locks, mod->locks_end,
459						mod->text, mod->text_end);
460	}
461	smp_mode = smp;
462	mutex_unlock(&smp_alt);
463}
464
465/* Return 1 if the address range is reserved for smp-alternatives */
 
 
 
466int alternatives_text_reserved(void *start, void *end)
467{
468	struct smp_alt_module *mod;
469	const s32 *poff;
470	u8 *text_start = start;
471	u8 *text_end = end;
472
 
 
473	list_for_each_entry(mod, &smp_alt_modules, next) {
474		if (mod->text > text_end || mod->text_end < text_start)
475			continue;
476		for (poff = mod->locks; poff < mod->locks_end; poff++) {
477			const u8 *ptr = (const u8 *)poff + *poff;
478
479			if (text_start <= ptr && text_end > ptr)
480				return 1;
481		}
482	}
483
484	return 0;
485}
486#endif
487
488#ifdef CONFIG_PARAVIRT
489void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
490				     struct paravirt_patch_site *end)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491{
492	struct paravirt_patch_site *p;
493	char insnbuf[MAX_PATCH_LEN];
 
494
495	if (noreplace_paravirt)
496		return;
497
498	for (p = start; p < end; p++) {
499		unsigned int used;
500
501		BUG_ON(p->len > MAX_PATCH_LEN);
502		/* prep the buffer with the original instructions */
503		memcpy(insnbuf, p->instr, p->len);
504		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
505					 (unsigned long)p->instr, p->len);
506
507		BUG_ON(used > p->len);
 
508
509		/* Pad the rest with nops */
510		add_nops(insnbuf + used, p->len - used);
511		text_poke_early(p->instr, insnbuf, p->len);
512	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513}
514extern struct paravirt_patch_site __start_parainstructions[],
515	__stop_parainstructions[];
516#endif	/* CONFIG_PARAVIRT */
517
518void __init alternative_instructions(void)
519{
520	/* The patching is not fully atomic, so try to avoid local interruptions
521	   that might execute the to be patched code.
522	   Other CPUs are not running. */
 
 
 
 
523	stop_nmi();
524
525	/*
526	 * Don't stop machine check exceptions while patching.
527	 * MCEs only happen when something got corrupted and in this
528	 * case we must do something about the corruption.
529	 * Ignoring it is worse than a unlikely patching race.
530	 * Also machine checks tend to be broadcast and if one CPU
531	 * goes into machine check the others follow quickly, so we don't
532	 * expect a machine check to cause undue problems during to code
533	 * patching.
534	 */
535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536	apply_alternatives(__alt_instructions, __alt_instructions_end);
537
538	/* switch to patch-once-at-boottime-only mode and free the
539	 * tables in case we know the number of CPUs will never ever
540	 * change */
541#ifdef CONFIG_HOTPLUG_CPU
542	if (num_possible_cpus() < 2)
543		smp_alt_once = 1;
544#endif
545
546#ifdef CONFIG_SMP
547	if (smp_alt_once) {
548		if (1 == num_possible_cpus()) {
549			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
550			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
551			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
552
553			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
554						_text, _etext);
555		}
556	} else {
557		alternatives_smp_module_add(NULL, "core kernel",
558					    __smp_locks, __smp_locks_end,
559					    _text, _etext);
560
561		/* Only switch to UP mode if we don't immediately boot others */
562		if (num_present_cpus() == 1 || setup_max_cpus <= 1)
563			alternatives_smp_switch(0);
564	}
565#endif
566 	apply_paravirt(__parainstructions, __parainstructions_end);
567
568	if (smp_alt_once)
569		free_init_pages("SMP alternatives",
570				(unsigned long)__smp_locks,
571				(unsigned long)__smp_locks_end);
 
 
572
573	restart_nmi();
 
 
 
574}
575
576/**
577 * text_poke_early - Update instructions on a live kernel at boot time
578 * @addr: address to modify
579 * @opcode: source of the copy
580 * @len: length to copy
581 *
582 * When you use this code to patch more than one byte of an instruction
583 * you need to make sure that other CPUs cannot execute this code in parallel.
584 * Also no thread must be currently preempted in the middle of these
585 * instructions. And on the local CPU you need to be protected again NMI or MCE
586 * handlers seeing an inconsistent instruction while you patch.
587 */
588void *__init_or_module text_poke_early(void *addr, const void *opcode,
589					      size_t len)
590{
591	unsigned long flags;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592	local_irq_save(flags);
593	memcpy(addr, opcode, len);
594	sync_core();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595	local_irq_restore(flags);
596	/* Could also do a CLFLUSH here to speed up CPU recovery; but
597	   that causes hangs on some VIA CPUs. */
598	return addr;
599}
600
601/**
602 * text_poke - Update instructions on a live kernel
603 * @addr: address to modify
604 * @opcode: source of the copy
605 * @len: length to copy
606 *
607 * Only atomic text poke/set should be allowed when not doing early patching.
608 * It means the size must be writable atomically and the address must be aligned
609 * in a way that permits an atomic write. It also makes sure we fit on a single
610 * page.
611 *
612 * Note: Must be called under text_mutex.
 
 
 
613 */
614void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
615{
616	unsigned long flags;
617	char *vaddr;
618	struct page *pages[2];
619	int i;
620
621	if (!core_kernel_text((unsigned long)addr)) {
622		pages[0] = vmalloc_to_page(addr);
623		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
624	} else {
625		pages[0] = virt_to_page(addr);
626		WARN_ON(!PageReserved(pages[0]));
627		pages[1] = virt_to_page(addr + PAGE_SIZE);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628	}
629	BUG_ON(!pages[0]);
630	local_irq_save(flags);
631	set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
632	if (pages[1])
633		set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
634	vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
635	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
636	clear_fixmap(FIX_TEXT_POKE0);
637	if (pages[1])
638		clear_fixmap(FIX_TEXT_POKE1);
639	local_flush_tlb();
640	sync_core();
641	/* Could also do a CLFLUSH here to speed up CPU recovery; but
642	   that causes hangs on some VIA CPUs. */
643	for (i = 0; i < len; i++)
644		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
645	local_irq_restore(flags);
646	return addr;
647}
648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649/*
650 * Cross-modifying kernel text with stop_machine().
651 * This code originally comes from immediate value.
 
652 */
653static atomic_t stop_machine_first;
654static int wrote_text;
 
 
 
 
 
 
 
 
655
656struct text_poke_params {
657	struct text_poke_param *params;
658	int nparams;
 
659};
660
661static int __kprobes stop_machine_text_poke(void *data)
 
 
 
662{
663	struct text_poke_params *tpp = data;
664	struct text_poke_param *p;
665	int i;
666
667	if (atomic_dec_and_test(&stop_machine_first)) {
668		for (i = 0; i < tpp->nparams; i++) {
669			p = &tpp->params[i];
670			text_poke(p->addr, p->opcode, p->len);
671		}
672		smp_wmb();	/* Make sure other cpus see that this has run */
673		wrote_text = 1;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
674	} else {
675		while (!wrote_text)
676			cpu_relax();
677		smp_mb();	/* Load wrote_text before following execution */
678	}
679
680	for (i = 0; i < tpp->nparams; i++) {
681		p = &tpp->params[i];
682		flush_icache_range((unsigned long)p->addr,
683				   (unsigned long)p->addr + p->len);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
684	}
685	/*
686	 * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
687	 * that a core serializing instruction such as "cpuid" should be
688	 * executed on _each_ core before the new instruction is made visible.
689	 */
690	sync_core();
691	return 0;
692}
693
 
 
 
 
694/**
695 * text_poke_smp - Update instructions on a live kernel on SMP
696 * @addr: address to modify
697 * @opcode: source of the copy
698 * @len: length to copy
699 *
700 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
701 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
702 * should be allowed, since stop_machine() does _not_ protect code against
703 * NMI and MCE.
704 *
705 * Note: Must be called under get_online_cpus() and text_mutex.
706 */
707void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
708{
709	struct text_poke_params tpp;
710	struct text_poke_param p;
711
712	p.addr = addr;
713	p.opcode = opcode;
714	p.len = len;
715	tpp.params = &p;
716	tpp.nparams = 1;
717	atomic_set(&stop_machine_first, 1);
718	wrote_text = 0;
719	/* Use __stop_machine() because the caller already got online_cpus. */
720	__stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
721	return addr;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722}
723
724/**
725 * text_poke_smp_batch - Update instructions on a live kernel on SMP
726 * @params: an array of text_poke parameters
727 * @n: the number of elements in params.
728 *
729 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
730 * stop_machine() is heavy task, it is better to aggregate text_poke requests
731 * and do it once if possible.
732 *
733 * Note: Must be called under get_online_cpus() and text_mutex.
 
 
734 */
735void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
736{
737	struct text_poke_params tpp = {.params = params, .nparams = n};
738
739	atomic_set(&stop_machine_first, 1);
740	wrote_text = 0;
741	__stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
742}
v6.8
   1// SPDX-License-Identifier: GPL-2.0-only
   2#define pr_fmt(fmt) "SMP alternatives: " fmt
   3
   4#include <linux/module.h>
   5#include <linux/sched.h>
   6#include <linux/perf_event.h>
   7#include <linux/mutex.h>
   8#include <linux/list.h>
   9#include <linux/stringify.h>
  10#include <linux/highmem.h>
  11#include <linux/mm.h>
  12#include <linux/vmalloc.h>
  13#include <linux/memory.h>
  14#include <linux/stop_machine.h>
  15#include <linux/slab.h>
  16#include <linux/kdebug.h>
  17#include <linux/kprobes.h>
  18#include <linux/mmu_context.h>
  19#include <linux/bsearch.h>
  20#include <linux/sync_core.h>
  21#include <asm/text-patching.h>
  22#include <asm/alternative.h>
  23#include <asm/sections.h>
 
  24#include <asm/mce.h>
  25#include <asm/nmi.h>
  26#include <asm/cacheflush.h>
  27#include <asm/tlbflush.h>
  28#include <asm/insn.h>
  29#include <asm/io.h>
  30#include <asm/fixmap.h>
  31#include <asm/paravirt.h>
  32#include <asm/asm-prototypes.h>
  33#include <asm/cfi.h>
  34
  35int __read_mostly alternatives_patched;
  36
  37EXPORT_SYMBOL_GPL(alternatives_patched);
 
  38
  39#define MAX_PATCH_LEN (255-1)
  40
  41#define DA_ALL		(~0)
  42#define DA_ALT		0x01
  43#define DA_RET		0x02
  44#define DA_RETPOLINE	0x04
  45#define DA_ENDBR	0x08
  46#define DA_SMP		0x10
 
  47
  48static unsigned int __initdata_or_module debug_alternative;
  49
  50static int __init debug_alt(char *str)
  51{
  52	if (str && *str == '=')
  53		str++;
  54
  55	if (!str || kstrtouint(str, 0, &debug_alternative))
  56		debug_alternative = DA_ALL;
  57
  58	return 1;
  59}
  60__setup("debug-alternative", debug_alt);
  61
  62static int noreplace_smp;
  63
  64static int __init setup_noreplace_smp(char *str)
  65{
  66	noreplace_smp = 1;
  67	return 1;
  68}
  69__setup("noreplace-smp", setup_noreplace_smp);
  70
  71#define DPRINTK(type, fmt, args...)					\
  72do {									\
  73	if (debug_alternative & DA_##type)				\
  74		printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args);		\
  75} while (0)
  76
  77#define DUMP_BYTES(type, buf, len, fmt, args...)			\
  78do {									\
  79	if (unlikely(debug_alternative & DA_##type)) {			\
  80		int j;							\
  81									\
  82		if (!(len))						\
  83			break;						\
  84									\
  85		printk(KERN_DEBUG pr_fmt(fmt), ##args);			\
  86		for (j = 0; j < (len) - 1; j++)				\
  87			printk(KERN_CONT "%02hhx ", buf[j]);		\
  88		printk(KERN_CONT "%02hhx\n", buf[j]);			\
  89	}								\
  90} while (0)
  91
  92static const unsigned char x86nops[] =
  93{
  94	BYTES_NOP1,
  95	BYTES_NOP2,
  96	BYTES_NOP3,
  97	BYTES_NOP4,
  98	BYTES_NOP5,
  99	BYTES_NOP6,
 100	BYTES_NOP7,
 101	BYTES_NOP8,
 102#ifdef CONFIG_64BIT
 103	BYTES_NOP9,
 104	BYTES_NOP10,
 105	BYTES_NOP11,
 106#endif
 107};
 108
 109const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
 110{
 111	NULL,
 112	x86nops,
 113	x86nops + 1,
 114	x86nops + 1 + 2,
 115	x86nops + 1 + 2 + 3,
 116	x86nops + 1 + 2 + 3 + 4,
 117	x86nops + 1 + 2 + 3 + 4 + 5,
 118	x86nops + 1 + 2 + 3 + 4 + 5 + 6,
 119	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 120#ifdef CONFIG_64BIT
 121	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
 122	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9,
 123	x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10,
 124#endif
 125};
 126
 127/*
 128 * Fill the buffer with a single effective instruction of size @len.
 129 *
 130 * In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info)
 131 * for every single-byte NOP, try to generate the maximally available NOP of
 132 * size <= ASM_NOP_MAX such that only a single CFI entry is generated (vs one for
 133 * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and
 134 * *jump* over instead of executing long and daft NOPs.
 135 */
 136static void __init_or_module add_nop(u8 *instr, unsigned int len)
 137{
 138	u8 *target = instr + len;
 139
 140	if (!len)
 141		return;
 142
 143	if (len <= ASM_NOP_MAX) {
 144		memcpy(instr, x86_nops[len], len);
 145		return;
 146	}
 147
 148	if (len < 128) {
 149		__text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE);
 150		instr += JMP8_INSN_SIZE;
 151	} else {
 152		__text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE);
 153		instr += JMP32_INSN_SIZE;
 154	}
 155
 156	for (;instr < target; instr++)
 157		*instr = INT3_INSN_OPCODE;
 158}
 159
 160extern s32 __retpoline_sites[], __retpoline_sites_end[];
 161extern s32 __return_sites[], __return_sites_end[];
 162extern s32 __cfi_sites[], __cfi_sites_end[];
 163extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
 164extern s32 __smp_locks[], __smp_locks_end[];
 165void text_poke_early(void *addr, const void *opcode, size_t len);
 166
 167/*
 168 * Matches NOP and NOPL, not any of the other possible NOPs.
 169 */
 170static bool insn_is_nop(struct insn *insn)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 171{
 172	/* Anything NOP, but no REP NOP */
 173	if (insn->opcode.bytes[0] == 0x90 &&
 174	    (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3))
 175		return true;
 176
 177	/* NOPL */
 178	if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F)
 179		return true;
 180
 181	/* TODO: more nops */
 
 
 182
 183	return false;
 184}
 185
 186/*
 187 * Find the offset of the first non-NOP instruction starting at @offset
 188 * but no further than @len.
 189 */
 190static int skip_nops(u8 *instr, int offset, int len)
 191{
 192	struct insn insn;
 193
 194	for (; offset < len; offset += insn.length) {
 195		if (insn_decode_kernel(&insn, &instr[offset]))
 196			break;
 197
 198		if (!insn_is_nop(&insn))
 199			break;
 200	}
 201
 202	return offset;
 203}
 204
 205/*
 206 * Optimize a sequence of NOPs, possibly preceded by an unconditional jump
 207 * to the end of the NOP sequence into a single NOP.
 208 */
 209static bool __init_or_module
 210__optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target)
 211{
 212	int i = *next - insn->length;
 213
 214	switch (insn->opcode.bytes[0]) {
 215	case JMP8_INSN_OPCODE:
 216	case JMP32_INSN_OPCODE:
 217		*prev = i;
 218		*target = *next + insn->immediate.value;
 219		return false;
 220	}
 221
 222	if (insn_is_nop(insn)) {
 223		int nop = i;
 224
 225		*next = skip_nops(instr, *next, len);
 226		if (*target && *next == *target)
 227			nop = *prev;
 228
 229		add_nop(instr + nop, *next - nop);
 230		DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next);
 231		return true;
 232	}
 233
 234	*target = 0;
 235	return false;
 236}
 237
 238/*
 239 * "noinline" to cause control flow change and thus invalidate I$ and
 240 * cause refetch after modification.
 241 */
 242static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
 243{
 244	int prev, target = 0;
 245
 246	for (int next, i = 0; i < len; i = next) {
 247		struct insn insn;
 248
 249		if (insn_decode_kernel(&insn, &instr[i]))
 250			return;
 251
 252		next = i + insn.length;
 253
 254		__optimize_nops(instr, len, &insn, &next, &prev, &target);
 255	}
 256}
 257
 258static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len)
 259{
 260	unsigned long flags;
 
 
 
 
 
 
 
 
 
 
 
 261
 262	local_irq_save(flags);
 263	optimize_nops(instr, len);
 264	sync_core();
 265	local_irq_restore(flags);
 266}
 267
 268/*
 269 * In this context, "source" is where the instructions are placed in the
 270 * section .altinstr_replacement, for example during kernel build by the
 271 * toolchain.
 272 * "Destination" is where the instructions are being patched in by this
 273 * machinery.
 274 *
 275 * The source offset is:
 276 *
 277 *   src_imm = target - src_next_ip                  (1)
 278 *
 279 * and the target offset is:
 280 *
 281 *   dst_imm = target - dst_next_ip                  (2)
 282 *
 283 * so rework (1) as an expression for target like:
 284 *
 285 *   target = src_imm + src_next_ip                  (1a)
 286 *
 287 * and substitute in (2) to get:
 288 *
 289 *   dst_imm = (src_imm + src_next_ip) - dst_next_ip (3)
 290 *
 291 * Now, since the instruction stream is 'identical' at src and dst (it
 292 * is being copied after all) it can be stated that:
 293 *
 294 *   src_next_ip = src + ip_offset
 295 *   dst_next_ip = dst + ip_offset                   (4)
 296 *
 297 * Substitute (4) in (3) and observe ip_offset being cancelled out to
 298 * obtain:
 299 *
 300 *   dst_imm = src_imm + (src + ip_offset) - (dst + ip_offset)
 301 *           = src_imm + src - dst + ip_offset - ip_offset
 302 *           = src_imm + src - dst                   (5)
 303 *
 304 * IOW, only the relative displacement of the code block matters.
 305 */
 306
 307#define apply_reloc_n(n_, p_, d_)				\
 308	do {							\
 309		s32 v = *(s##n_ *)(p_);				\
 310		v += (d_);					\
 311		BUG_ON((v >> 31) != (v >> (n_-1)));		\
 312		*(s##n_ *)(p_) = (s##n_)v;			\
 313	} while (0)
 314
 315
 316static __always_inline
 317void apply_reloc(int n, void *ptr, uintptr_t diff)
 318{
 319	switch (n) {
 320	case 1: apply_reloc_n(8, ptr, diff); break;
 321	case 2: apply_reloc_n(16, ptr, diff); break;
 322	case 4: apply_reloc_n(32, ptr, diff); break;
 323	default: BUG();
 324	}
 325}
 326
 327static __always_inline
 328bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
 329{
 330	u8 *target = src + offset;
 331	/*
 332	 * If the target is inside the patched block, it's relative to the
 333	 * block itself and does not need relocation.
 334	 */
 335	return (target < src || target > src + src_len);
 336}
 337
 338static void __init_or_module noinline
 339apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
 
 340{
 341	int prev, target = 0;
 
 
 
 
 
 
 
 
 
 
 
 342
 343	for (int next, i = 0; i < len; i = next) {
 344		struct insn insn;
 
 
 
 
 345
 346		if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
 347			return;
 348
 349		next = i + insn.length;
 350
 351		if (__optimize_nops(buf, len, &insn, &next, &prev, &target))
 352			continue;
 353
 354		switch (insn.opcode.bytes[0]) {
 355		case 0x0f:
 356			if (insn.opcode.bytes[1] < 0x80 ||
 357			    insn.opcode.bytes[1] > 0x8f)
 358				break;
 359
 360			fallthrough;	/* Jcc.d32 */
 361		case 0x70 ... 0x7f:	/* Jcc.d8 */
 362		case JMP8_INSN_OPCODE:
 363		case JMP32_INSN_OPCODE:
 364		case CALL_INSN_OPCODE:
 365			if (need_reloc(next + insn.immediate.value, src, src_len)) {
 366				apply_reloc(insn.immediate.nbytes,
 367					    buf + i + insn_offset_immediate(&insn),
 368					    src - dest);
 369			}
 370
 371			/*
 372			 * Where possible, convert JMP.d32 into JMP.d8.
 373			 */
 374			if (insn.opcode.bytes[0] == JMP32_INSN_OPCODE) {
 375				s32 imm = insn.immediate.value;
 376				imm += src - dest;
 377				imm += JMP32_INSN_SIZE - JMP8_INSN_SIZE;
 378				if ((imm >> 31) == (imm >> 7)) {
 379					buf[i+0] = JMP8_INSN_OPCODE;
 380					buf[i+1] = (s8)imm;
 381
 382					memset(&buf[i+2], INT3_INSN_OPCODE, insn.length - 2);
 383				}
 384			}
 385			break;
 386		}
 387
 388		if (insn_rip_relative(&insn)) {
 389			if (need_reloc(next + insn.displacement.value, src, src_len)) {
 390				apply_reloc(insn.displacement.nbytes,
 391					    buf + i + insn_offset_displacement(&insn),
 392					    src - dest);
 393			}
 394		}
 
 
 
 
 
 
 
 
 
 
 
 
 395	}
 396}
 397
 398/* Low-level backend functions usable from alternative code replacements. */
 399DEFINE_ASM_FUNC(nop_func, "", .entry.text);
 400EXPORT_SYMBOL_GPL(nop_func);
 401
 402noinstr void BUG_func(void)
 403{
 404	BUG();
 
 
 
 
 
 
 
 405}
 406EXPORT_SYMBOL(BUG_func);
 407
 408#define CALL_RIP_REL_OPCODE	0xff
 409#define CALL_RIP_REL_MODRM	0x15
 
 410
 411/*
 412 * Rewrite the "call BUG_func" replacement to point to the target of the
 413 * indirect pv_ops call "call *disp(%ip)".
 414 */
 415static int alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
 416{
 417	void *target, *bug = &BUG_func;
 418	s32 disp;
 419
 420	if (a->replacementlen != 5 || insn_buff[0] != CALL_INSN_OPCODE) {
 421		pr_err("ALT_FLAG_DIRECT_CALL set for a non-call replacement instruction\n");
 422		BUG();
 423	}
 424
 425	if (a->instrlen != 6 ||
 426	    instr[0] != CALL_RIP_REL_OPCODE ||
 427	    instr[1] != CALL_RIP_REL_MODRM) {
 428		pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n");
 429		BUG();
 430	}
 431
 432	/* Skip CALL_RIP_REL_OPCODE and CALL_RIP_REL_MODRM */
 433	disp = *(s32 *)(instr + 2);
 434#ifdef CONFIG_X86_64
 435	/* ff 15 00 00 00 00   call   *0x0(%rip) */
 436	/* target address is stored at "next instruction + disp". */
 437	target = *(void **)(instr + a->instrlen + disp);
 438#else
 439	/* ff 15 00 00 00 00   call   *0x0 */
 440	/* target address is stored at disp. */
 441	target = *(void **)disp;
 442#endif
 443	if (!target)
 444		target = bug;
 445
 446	/* (BUG_func - .) + (target - BUG_func) := target - . */
 447	*(s32 *)(insn_buff + 1) += target - bug;
 448
 449	if (target == &nop_func)
 450		return 0;
 451
 452	return 5;
 453}
 454
 455/*
 456 * Replace instructions with better alternatives for this CPU type. This runs
 457 * before SMP is initialized to avoid SMP problems with self modifying code.
 458 * This implies that asymmetric systems where APs have less capabilities than
 459 * the boot processor are not handled. Tough. Make sure you disable such
 460 * features by hand.
 461 *
 462 * Marked "noinline" to cause control flow change and thus insn cache
 463 * to refetch changed I$ lines.
 464 */
 465void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 466						  struct alt_instr *end)
 467{
 468	struct alt_instr *a;
 469	u8 *instr, *replacement;
 470	u8 insn_buff[MAX_PATCH_LEN];
 471
 472	DPRINTK(ALT, "alt table %px, -> %px", start, end);
 473
 474	/*
 475	 * In the case CONFIG_X86_5LEVEL=y, KASAN_SHADOW_START is defined using
 476	 * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here.
 477	 * During the process, KASAN becomes confused seeing partial LA57
 478	 * conversion and triggers a false-positive out-of-bound report.
 479	 *
 480	 * Disable KASAN until the patching is complete.
 481	 */
 482	kasan_disable_current();
 483
 
 484	/*
 485	 * The scan order should be from start to end. A later scanned
 486	 * alternative code can overwrite previously scanned alternative code.
 487	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
 488	 * patch code.
 489	 *
 490	 * So be careful if you want to change the scan order to any other
 491	 * order.
 492	 */
 493	for (a = start; a < end; a++) {
 494		int insn_buff_sz = 0;
 495
 496		instr = (u8 *)&a->instr_offset + a->instr_offset;
 497		replacement = (u8 *)&a->repl_offset + a->repl_offset;
 498		BUG_ON(a->instrlen > sizeof(insn_buff));
 499		BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
 500
 501		/*
 502		 * Patch if either:
 503		 * - feature is present
 504		 * - feature not present but ALT_FLAG_NOT is set to mean,
 505		 *   patch if feature is *NOT* present.
 506		 */
 507		if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
 508			optimize_nops_inplace(instr, a->instrlen);
 509			continue;
 510		}
 511
 512		DPRINTK(ALT, "feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d) flags: 0x%x",
 513			a->cpuid >> 5,
 514			a->cpuid & 0x1f,
 515			instr, instr, a->instrlen,
 516			replacement, a->replacementlen, a->flags);
 517
 518		memcpy(insn_buff, replacement, a->replacementlen);
 519		insn_buff_sz = a->replacementlen;
 520
 521		if (a->flags & ALT_FLAG_DIRECT_CALL) {
 522			insn_buff_sz = alt_replace_call(instr, insn_buff, a);
 523			if (insn_buff_sz < 0)
 524				continue;
 525		}
 526
 527		for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
 528			insn_buff[insn_buff_sz] = 0x90;
 529
 530		apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen);
 531
 532		DUMP_BYTES(ALT, instr, a->instrlen, "%px:   old_insn: ", instr);
 533		DUMP_BYTES(ALT, replacement, a->replacementlen, "%px:   rpl_insn: ", replacement);
 534		DUMP_BYTES(ALT, insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
 535
 536		text_poke_early(instr, insn_buff, insn_buff_sz);
 537	}
 538
 539	kasan_enable_current();
 540}
 541
 542static inline bool is_jcc32(struct insn *insn)
 543{
 544	/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
 545	return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
 546}
 547
 548#if defined(CONFIG_RETPOLINE) && defined(CONFIG_OBJTOOL)
 549
 550/*
 551 * CALL/JMP *%\reg
 552 */
 553static int emit_indirect(int op, int reg, u8 *bytes)
 554{
 555	int i = 0;
 556	u8 modrm;
 557
 558	switch (op) {
 559	case CALL_INSN_OPCODE:
 560		modrm = 0x10; /* Reg = 2; CALL r/m */
 561		break;
 562
 563	case JMP32_INSN_OPCODE:
 564		modrm = 0x20; /* Reg = 4; JMP r/m */
 565		break;
 566
 567	default:
 568		WARN_ON_ONCE(1);
 569		return -1;
 570	}
 571
 572	if (reg >= 8) {
 573		bytes[i++] = 0x41; /* REX.B prefix */
 574		reg -= 8;
 575	}
 576
 577	modrm |= 0xc0; /* Mod = 3 */
 578	modrm += reg;
 579
 580	bytes[i++] = 0xff; /* opcode */
 581	bytes[i++] = modrm;
 582
 583	return i;
 584}
 585
 586static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
 587{
 588	u8 op = insn->opcode.bytes[0];
 589	int i = 0;
 590
 591	/*
 592	 * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional
 593	 * tail-calls. Deal with them.
 594	 */
 595	if (is_jcc32(insn)) {
 596		bytes[i++] = op;
 597		op = insn->opcode.bytes[1];
 598		goto clang_jcc;
 599	}
 600
 601	if (insn->length == 6)
 602		bytes[i++] = 0x2e; /* CS-prefix */
 603
 604	switch (op) {
 605	case CALL_INSN_OPCODE:
 606		__text_gen_insn(bytes+i, op, addr+i,
 607				__x86_indirect_call_thunk_array[reg],
 608				CALL_INSN_SIZE);
 609		i += CALL_INSN_SIZE;
 610		break;
 611
 612	case JMP32_INSN_OPCODE:
 613clang_jcc:
 614		__text_gen_insn(bytes+i, op, addr+i,
 615				__x86_indirect_jump_thunk_array[reg],
 616				JMP32_INSN_SIZE);
 617		i += JMP32_INSN_SIZE;
 618		break;
 619
 620	default:
 621		WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr);
 622		return -1;
 623	}
 624
 625	WARN_ON_ONCE(i != insn->length);
 626
 627	return i;
 628}
 629
 630/*
 631 * Rewrite the compiler generated retpoline thunk calls.
 632 *
 633 * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
 634 * indirect instructions, avoiding the extra indirection.
 635 *
 636 * For example, convert:
 637 *
 638 *   CALL __x86_indirect_thunk_\reg
 639 *
 640 * into:
 641 *
 642 *   CALL *%\reg
 643 *
 644 * It also tries to inline spectre_v2=retpoline,lfence when size permits.
 645 */
 646static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
 647{
 648	retpoline_thunk_t *target;
 649	int reg, ret, i = 0;
 650	u8 op, cc;
 651
 652	target = addr + insn->length + insn->immediate.value;
 653	reg = target - __x86_indirect_thunk_array;
 654
 655	if (WARN_ON_ONCE(reg & ~0xf))
 656		return -1;
 657
 658	/* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
 659	BUG_ON(reg == 4);
 660
 661	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
 662	    !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
 663		if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
 664			return emit_call_track_retpoline(addr, insn, reg, bytes);
 665
 666		return -1;
 667	}
 668
 669	op = insn->opcode.bytes[0];
 670
 671	/*
 672	 * Convert:
 673	 *
 674	 *   Jcc.d32 __x86_indirect_thunk_\reg
 675	 *
 676	 * into:
 677	 *
 678	 *   Jncc.d8 1f
 679	 *   [ LFENCE ]
 680	 *   JMP *%\reg
 681	 *   [ NOP ]
 682	 * 1:
 683	 */
 684	if (is_jcc32(insn)) {
 685		cc = insn->opcode.bytes[1] & 0xf;
 686		cc ^= 1; /* invert condition */
 687
 688		bytes[i++] = 0x70 + cc;        /* Jcc.d8 */
 689		bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */
 690
 691		/* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */
 692		op = JMP32_INSN_OPCODE;
 693	}
 694
 695	/*
 696	 * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE.
 697	 */
 698	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
 699		bytes[i++] = 0x0f;
 700		bytes[i++] = 0xae;
 701		bytes[i++] = 0xe8; /* LFENCE */
 702	}
 703
 704	ret = emit_indirect(op, reg, bytes + i);
 705	if (ret < 0)
 706		return ret;
 707	i += ret;
 708
 709	/*
 710	 * The compiler is supposed to EMIT an INT3 after every unconditional
 711	 * JMP instruction due to AMD BTC. However, if the compiler is too old
 712	 * or SLS isn't enabled, we still need an INT3 after indirect JMPs
 713	 * even on Intel.
 714	 */
 715	if (op == JMP32_INSN_OPCODE && i < insn->length)
 716		bytes[i++] = INT3_INSN_OPCODE;
 717
 718	for (; i < insn->length;)
 719		bytes[i++] = BYTES_NOP1;
 720
 721	return i;
 722}
 723
 724/*
 725 * Generated by 'objtool --retpoline'.
 726 */
 727void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
 728{
 729	s32 *s;
 730
 731	for (s = start; s < end; s++) {
 732		void *addr = (void *)s + *s;
 733		struct insn insn;
 734		int len, ret;
 735		u8 bytes[16];
 736		u8 op1, op2;
 737
 738		ret = insn_decode_kernel(&insn, addr);
 739		if (WARN_ON_ONCE(ret < 0))
 740			continue;
 741
 742		op1 = insn.opcode.bytes[0];
 743		op2 = insn.opcode.bytes[1];
 744
 745		switch (op1) {
 746		case CALL_INSN_OPCODE:
 747		case JMP32_INSN_OPCODE:
 748			break;
 749
 750		case 0x0f: /* escape */
 751			if (op2 >= 0x80 && op2 <= 0x8f)
 752				break;
 753			fallthrough;
 754		default:
 755			WARN_ON_ONCE(1);
 756			continue;
 757		}
 758
 759		DPRINTK(RETPOLINE, "retpoline at: %pS (%px) len: %d to: %pS",
 760			addr, addr, insn.length,
 761			addr + insn.length + insn.immediate.value);
 762
 763		len = patch_retpoline(addr, &insn, bytes);
 764		if (len == insn.length) {
 765			optimize_nops(bytes, len);
 766			DUMP_BYTES(RETPOLINE, ((u8*)addr),  len, "%px: orig: ", addr);
 767			DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr);
 768			text_poke_early(addr, bytes, len);
 769		}
 770	}
 771}
 772
 773#ifdef CONFIG_RETHUNK
 774
 775/*
 776 * Rewrite the compiler generated return thunk tail-calls.
 777 *
 778 * For example, convert:
 779 *
 780 *   JMP __x86_return_thunk
 781 *
 782 * into:
 783 *
 784 *   RET
 785 */
 786static int patch_return(void *addr, struct insn *insn, u8 *bytes)
 787{
 788	int i = 0;
 789
 790	/* Patch the custom return thunks... */
 791	if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
 792		i = JMP32_INSN_SIZE;
 793		__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
 794	} else {
 795		/* ... or patch them out if not needed. */
 796		bytes[i++] = RET_INSN_OPCODE;
 797	}
 798
 799	for (; i < insn->length;)
 800		bytes[i++] = INT3_INSN_OPCODE;
 801	return i;
 802}
 803
 804void __init_or_module noinline apply_returns(s32 *start, s32 *end)
 805{
 806	s32 *s;
 807
 808	if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
 809		static_call_force_reinit();
 810
 811	for (s = start; s < end; s++) {
 812		void *dest = NULL, *addr = (void *)s + *s;
 813		struct insn insn;
 814		int len, ret;
 815		u8 bytes[16];
 816		u8 op;
 817
 818		ret = insn_decode_kernel(&insn, addr);
 819		if (WARN_ON_ONCE(ret < 0))
 820			continue;
 821
 822		op = insn.opcode.bytes[0];
 823		if (op == JMP32_INSN_OPCODE)
 824			dest = addr + insn.length + insn.immediate.value;
 825
 826		if (__static_call_fixup(addr, op, dest) ||
 827		    WARN_ONCE(dest != &__x86_return_thunk,
 828			      "missing return thunk: %pS-%pS: %*ph",
 829			      addr, dest, 5, addr))
 830			continue;
 831
 832		DPRINTK(RET, "return thunk at: %pS (%px) len: %d to: %pS",
 833			addr, addr, insn.length,
 834			addr + insn.length + insn.immediate.value);
 835
 836		len = patch_return(addr, &insn, bytes);
 837		if (len == insn.length) {
 838			DUMP_BYTES(RET, ((u8*)addr),  len, "%px: orig: ", addr);
 839			DUMP_BYTES(RET, ((u8*)bytes), len, "%px: repl: ", addr);
 840			text_poke_early(addr, bytes, len);
 841		}
 842	}
 843}
 844#else
 845void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
 846#endif /* CONFIG_RETHUNK */
 847
 848#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */
 849
 850void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
 851void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
 852
 853#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */
 854
 855#ifdef CONFIG_X86_KERNEL_IBT
 856
 857static void poison_cfi(void *addr);
 858
 859static void __init_or_module poison_endbr(void *addr, bool warn)
 860{
 861	u32 endbr, poison = gen_endbr_poison();
 862
 863	if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
 864		return;
 865
 866	if (!is_endbr(endbr)) {
 867		WARN_ON_ONCE(warn);
 868		return;
 869	}
 870
 871	DPRINTK(ENDBR, "ENDBR at: %pS (%px)", addr, addr);
 872
 873	/*
 874	 * When we have IBT, the lack of ENDBR will trigger #CP
 875	 */
 876	DUMP_BYTES(ENDBR, ((u8*)addr), 4, "%px: orig: ", addr);
 877	DUMP_BYTES(ENDBR, ((u8*)&poison), 4, "%px: repl: ", addr);
 878	text_poke_early(addr, &poison, 4);
 879}
 880
 881/*
 882 * Generated by: objtool --ibt
 883 *
 884 * Seal the functions for indirect calls by clobbering the ENDBR instructions
 885 * and the kCFI hash value.
 886 */
 887void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
 888{
 889	s32 *s;
 890
 891	for (s = start; s < end; s++) {
 892		void *addr = (void *)s + *s;
 893
 894		poison_endbr(addr, true);
 895		if (IS_ENABLED(CONFIG_FINEIBT))
 896			poison_cfi(addr - 16);
 897	}
 898}
 899
 900#else
 901
 902void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
 903
 904#endif /* CONFIG_X86_KERNEL_IBT */
 905
 906#ifdef CONFIG_FINEIBT
 907#define __CFI_DEFAULT	CFI_DEFAULT
 908#elif defined(CONFIG_CFI_CLANG)
 909#define __CFI_DEFAULT	CFI_KCFI
 910#else
 911#define __CFI_DEFAULT	CFI_OFF
 912#endif
 913
 914enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT;
 915
 916#ifdef CONFIG_CFI_CLANG
 917struct bpf_insn;
 918
 919/* Must match bpf_func_t / DEFINE_BPF_PROG_RUN() */
 920extern unsigned int __bpf_prog_runX(const void *ctx,
 921				    const struct bpf_insn *insn);
 922
 923/*
 924 * Force a reference to the external symbol so the compiler generates
 925 * __kcfi_typid.
 926 */
 927__ADDRESSABLE(__bpf_prog_runX);
 928
 929/* u32 __ro_after_init cfi_bpf_hash = __kcfi_typeid___bpf_prog_runX; */
 930asm (
 931"	.pushsection	.data..ro_after_init,\"aw\",@progbits	\n"
 932"	.type	cfi_bpf_hash,@object				\n"
 933"	.globl	cfi_bpf_hash					\n"
 934"	.p2align	2, 0x0					\n"
 935"cfi_bpf_hash:							\n"
 936"	.long	__kcfi_typeid___bpf_prog_runX			\n"
 937"	.size	cfi_bpf_hash, 4					\n"
 938"	.popsection						\n"
 939);
 940
 941/* Must match bpf_callback_t */
 942extern u64 __bpf_callback_fn(u64, u64, u64, u64, u64);
 943
 944__ADDRESSABLE(__bpf_callback_fn);
 945
 946/* u32 __ro_after_init cfi_bpf_subprog_hash = __kcfi_typeid___bpf_callback_fn; */
 947asm (
 948"	.pushsection	.data..ro_after_init,\"aw\",@progbits	\n"
 949"	.type	cfi_bpf_subprog_hash,@object			\n"
 950"	.globl	cfi_bpf_subprog_hash				\n"
 951"	.p2align	2, 0x0					\n"
 952"cfi_bpf_subprog_hash:						\n"
 953"	.long	__kcfi_typeid___bpf_callback_fn			\n"
 954"	.size	cfi_bpf_subprog_hash, 4				\n"
 955"	.popsection						\n"
 956);
 957
 958u32 cfi_get_func_hash(void *func)
 959{
 960	u32 hash;
 961
 962	func -= cfi_get_offset();
 963	switch (cfi_mode) {
 964	case CFI_FINEIBT:
 965		func += 7;
 966		break;
 967	case CFI_KCFI:
 968		func += 1;
 969		break;
 970	default:
 971		return 0;
 972	}
 973
 974	if (get_kernel_nofault(hash, func))
 975		return 0;
 976
 977	return hash;
 978}
 979#endif
 980
 981#ifdef CONFIG_FINEIBT
 982
 983static bool cfi_rand __ro_after_init = true;
 984static u32  cfi_seed __ro_after_init;
 985
 986/*
 987 * Re-hash the CFI hash with a boot-time seed while making sure the result is
 988 * not a valid ENDBR instruction.
 989 */
 990static u32 cfi_rehash(u32 hash)
 991{
 992	hash ^= cfi_seed;
 993	while (unlikely(is_endbr(hash) || is_endbr(-hash))) {
 994		bool lsb = hash & 1;
 995		hash >>= 1;
 996		if (lsb)
 997			hash ^= 0x80200003;
 998	}
 999	return hash;
1000}
1001
1002static __init int cfi_parse_cmdline(char *str)
1003{
1004	if (!str)
1005		return -EINVAL;
1006
1007	while (str) {
1008		char *next = strchr(str, ',');
1009		if (next) {
1010			*next = 0;
1011			next++;
1012		}
1013
1014		if (!strcmp(str, "auto")) {
1015			cfi_mode = CFI_DEFAULT;
1016		} else if (!strcmp(str, "off")) {
1017			cfi_mode = CFI_OFF;
1018			cfi_rand = false;
1019		} else if (!strcmp(str, "kcfi")) {
1020			cfi_mode = CFI_KCFI;
1021		} else if (!strcmp(str, "fineibt")) {
1022			cfi_mode = CFI_FINEIBT;
1023		} else if (!strcmp(str, "norand")) {
1024			cfi_rand = false;
1025		} else {
1026			pr_err("Ignoring unknown cfi option (%s).", str);
1027		}
1028
1029		str = next;
1030	}
1031
1032	return 0;
1033}
1034early_param("cfi", cfi_parse_cmdline);
1035
1036/*
1037 * kCFI						FineIBT
1038 *
1039 * __cfi_\func:					__cfi_\func:
1040 *	movl   $0x12345678,%eax		// 5	     endbr64			// 4
1041 *	nop					     subl   $0x12345678,%r10d   // 7
1042 *	nop					     jz     1f			// 2
1043 *	nop					     ud2			// 2
1044 *	nop					1:   nop			// 1
1045 *	nop
1046 *	nop
1047 *	nop
1048 *	nop
1049 *	nop
1050 *	nop
1051 *	nop
1052 *
1053 *
1054 * caller:					caller:
1055 *	movl	$(-0x12345678),%r10d	 // 6	     movl   $0x12345678,%r10d	// 6
1056 *	addl	$-15(%r11),%r10d	 // 4	     sub    $16,%r11		// 4
1057 *	je	1f			 // 2	     nop4			// 4
1058 *	ud2				 // 2
1059 * 1:	call	__x86_indirect_thunk_r11 // 5	     call   *%r11; nop2;	// 5
1060 *
1061 */
1062
1063asm(	".pushsection .rodata			\n"
1064	"fineibt_preamble_start:		\n"
1065	"	endbr64				\n"
1066	"	subl	$0x12345678, %r10d	\n"
1067	"	je	fineibt_preamble_end	\n"
1068	"	ud2				\n"
1069	"	nop				\n"
1070	"fineibt_preamble_end:			\n"
1071	".popsection\n"
1072);
1073
1074extern u8 fineibt_preamble_start[];
1075extern u8 fineibt_preamble_end[];
1076
1077#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
1078#define fineibt_preamble_hash 7
1079
1080asm(	".pushsection .rodata			\n"
1081	"fineibt_caller_start:			\n"
1082	"	movl	$0x12345678, %r10d	\n"
1083	"	sub	$16, %r11		\n"
1084	ASM_NOP4
1085	"fineibt_caller_end:			\n"
1086	".popsection				\n"
1087);
1088
1089extern u8 fineibt_caller_start[];
1090extern u8 fineibt_caller_end[];
1091
1092#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start)
1093#define fineibt_caller_hash 2
1094
1095#define fineibt_caller_jmp (fineibt_caller_size - 2)
1096
1097static u32 decode_preamble_hash(void *addr)
1098{
1099	u8 *p = addr;
1100
1101	/* b8 78 56 34 12          mov    $0x12345678,%eax */
1102	if (p[0] == 0xb8)
1103		return *(u32 *)(addr + 1);
1104
1105	return 0; /* invalid hash value */
1106}
1107
1108static u32 decode_caller_hash(void *addr)
1109{
1110	u8 *p = addr;
1111
1112	/* 41 ba 78 56 34 12       mov    $0x12345678,%r10d */
1113	if (p[0] == 0x41 && p[1] == 0xba)
1114		return -*(u32 *)(addr + 2);
1115
1116	/* e8 0c 78 56 34 12	   jmp.d8  +12 */
1117	if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp)
1118		return -*(u32 *)(addr + 2);
1119
1120	return 0; /* invalid hash value */
1121}
1122
1123/* .retpoline_sites */
1124static int cfi_disable_callers(s32 *start, s32 *end)
1125{
1126	/*
1127	 * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate
1128	 * in tact for later usage. Also see decode_caller_hash() and
1129	 * cfi_rewrite_callers().
1130	 */
1131	const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp };
1132	s32 *s;
1133
1134	for (s = start; s < end; s++) {
1135		void *addr = (void *)s + *s;
1136		u32 hash;
1137
1138		addr -= fineibt_caller_size;
1139		hash = decode_caller_hash(addr);
1140		if (!hash) /* nocfi callers */
1141			continue;
1142
1143		text_poke_early(addr, jmp, 2);
1144	}
1145
1146	return 0;
1147}
1148
1149static int cfi_enable_callers(s32 *start, s32 *end)
1150{
1151	/*
1152	 * Re-enable kCFI, undo what cfi_disable_callers() did.
1153	 */
1154	const u8 mov[] = { 0x41, 0xba };
1155	s32 *s;
1156
1157	for (s = start; s < end; s++) {
1158		void *addr = (void *)s + *s;
1159		u32 hash;
1160
1161		addr -= fineibt_caller_size;
1162		hash = decode_caller_hash(addr);
1163		if (!hash) /* nocfi callers */
1164			continue;
1165
1166		text_poke_early(addr, mov, 2);
1167	}
1168
1169	return 0;
1170}
1171
1172/* .cfi_sites */
1173static int cfi_rand_preamble(s32 *start, s32 *end)
1174{
1175	s32 *s;
1176
1177	for (s = start; s < end; s++) {
1178		void *addr = (void *)s + *s;
1179		u32 hash;
1180
1181		hash = decode_preamble_hash(addr);
1182		if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
1183			 addr, addr, 5, addr))
1184			return -EINVAL;
1185
1186		hash = cfi_rehash(hash);
1187		text_poke_early(addr + 1, &hash, 4);
1188	}
1189
1190	return 0;
1191}
1192
1193static int cfi_rewrite_preamble(s32 *start, s32 *end)
1194{
1195	s32 *s;
1196
1197	for (s = start; s < end; s++) {
1198		void *addr = (void *)s + *s;
1199		u32 hash;
1200
1201		hash = decode_preamble_hash(addr);
1202		if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
1203			 addr, addr, 5, addr))
1204			return -EINVAL;
1205
1206		text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
1207		WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
1208		text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
1209	}
1210
1211	return 0;
1212}
1213
1214static void cfi_rewrite_endbr(s32 *start, s32 *end)
1215{
1216	s32 *s;
1217
1218	for (s = start; s < end; s++) {
1219		void *addr = (void *)s + *s;
1220
1221		poison_endbr(addr+16, false);
1222	}
1223}
1224
1225/* .retpoline_sites */
1226static int cfi_rand_callers(s32 *start, s32 *end)
1227{
1228	s32 *s;
1229
1230	for (s = start; s < end; s++) {
1231		void *addr = (void *)s + *s;
1232		u32 hash;
1233
1234		addr -= fineibt_caller_size;
1235		hash = decode_caller_hash(addr);
1236		if (hash) {
1237			hash = -cfi_rehash(hash);
1238			text_poke_early(addr + 2, &hash, 4);
1239		}
1240	}
1241
1242	return 0;
1243}
1244
1245static int cfi_rewrite_callers(s32 *start, s32 *end)
1246{
1247	s32 *s;
1248
1249	for (s = start; s < end; s++) {
1250		void *addr = (void *)s + *s;
1251		u32 hash;
1252
1253		addr -= fineibt_caller_size;
1254		hash = decode_caller_hash(addr);
1255		if (hash) {
1256			text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
1257			WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
1258			text_poke_early(addr + fineibt_caller_hash, &hash, 4);
1259		}
1260		/* rely on apply_retpolines() */
1261	}
1262
1263	return 0;
1264}
1265
1266static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
1267			    s32 *start_cfi, s32 *end_cfi, bool builtin)
1268{
1269	int ret;
1270
1271	if (WARN_ONCE(fineibt_preamble_size != 16,
1272		      "FineIBT preamble wrong size: %ld", fineibt_preamble_size))
1273		return;
1274
1275	if (cfi_mode == CFI_DEFAULT) {
1276		cfi_mode = CFI_KCFI;
1277		if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
1278			cfi_mode = CFI_FINEIBT;
1279	}
1280
1281	/*
1282	 * Rewrite the callers to not use the __cfi_ stubs, such that we might
1283	 * rewrite them. This disables all CFI. If this succeeds but any of the
1284	 * later stages fails, we're without CFI.
1285	 */
1286	ret = cfi_disable_callers(start_retpoline, end_retpoline);
1287	if (ret)
1288		goto err;
1289
1290	if (cfi_rand) {
1291		if (builtin) {
1292			cfi_seed = get_random_u32();
1293			cfi_bpf_hash = cfi_rehash(cfi_bpf_hash);
1294			cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
1295		}
1296
1297		ret = cfi_rand_preamble(start_cfi, end_cfi);
1298		if (ret)
1299			goto err;
1300
1301		ret = cfi_rand_callers(start_retpoline, end_retpoline);
1302		if (ret)
1303			goto err;
1304	}
1305
1306	switch (cfi_mode) {
1307	case CFI_OFF:
1308		if (builtin)
1309			pr_info("Disabling CFI\n");
1310		return;
1311
1312	case CFI_KCFI:
1313		ret = cfi_enable_callers(start_retpoline, end_retpoline);
1314		if (ret)
1315			goto err;
1316
1317		if (builtin)
1318			pr_info("Using kCFI\n");
1319		return;
1320
1321	case CFI_FINEIBT:
1322		/* place the FineIBT preamble at func()-16 */
1323		ret = cfi_rewrite_preamble(start_cfi, end_cfi);
1324		if (ret)
1325			goto err;
1326
1327		/* rewrite the callers to target func()-16 */
1328		ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
1329		if (ret)
1330			goto err;
1331
1332		/* now that nobody targets func()+0, remove ENDBR there */
1333		cfi_rewrite_endbr(start_cfi, end_cfi);
1334
1335		if (builtin)
1336			pr_info("Using FineIBT CFI\n");
1337		return;
1338
1339	default:
1340		break;
1341	}
1342
1343err:
1344	pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n");
1345}
1346
1347static inline void poison_hash(void *addr)
1348{
1349	*(u32 *)addr = 0;
1350}
1351
1352static void poison_cfi(void *addr)
1353{
1354	switch (cfi_mode) {
1355	case CFI_FINEIBT:
1356		/*
1357		 * __cfi_\func:
1358		 *	osp nopl (%rax)
1359		 *	subl	$0, %r10d
1360		 *	jz	1f
1361		 *	ud2
1362		 * 1:	nop
1363		 */
1364		poison_endbr(addr, false);
1365		poison_hash(addr + fineibt_preamble_hash);
1366		break;
1367
1368	case CFI_KCFI:
1369		/*
1370		 * __cfi_\func:
1371		 *	movl	$0, %eax
1372		 *	.skip	11, 0x90
1373		 */
1374		poison_hash(addr + 1);
1375		break;
1376
1377	default:
1378		break;
1379	}
1380}
1381
1382#else
1383
1384static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
1385			    s32 *start_cfi, s32 *end_cfi, bool builtin)
1386{
1387}
1388
1389#ifdef CONFIG_X86_KERNEL_IBT
1390static void poison_cfi(void *addr) { }
1391#endif
1392
1393#endif
1394
1395void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
1396		   s32 *start_cfi, s32 *end_cfi)
1397{
1398	return __apply_fineibt(start_retpoline, end_retpoline,
1399			       start_cfi, end_cfi,
1400			       /* .builtin = */ false);
1401}
1402
1403#ifdef CONFIG_SMP
1404static void alternatives_smp_lock(const s32 *start, const s32 *end,
1405				  u8 *text, u8 *text_end)
1406{
1407	const s32 *poff;
1408
 
1409	for (poff = start; poff < end; poff++) {
1410		u8 *ptr = (u8 *)poff + *poff;
1411
1412		if (!*poff || ptr < text || ptr >= text_end)
1413			continue;
1414		/* turn DS segment override prefix into lock prefix */
1415		if (*ptr == 0x3e)
1416			text_poke(ptr, ((unsigned char []){0xf0}), 1);
1417	}
 
1418}
1419
1420static void alternatives_smp_unlock(const s32 *start, const s32 *end,
1421				    u8 *text, u8 *text_end)
1422{
1423	const s32 *poff;
1424
 
 
 
 
1425	for (poff = start; poff < end; poff++) {
1426		u8 *ptr = (u8 *)poff + *poff;
1427
1428		if (!*poff || ptr < text || ptr >= text_end)
1429			continue;
1430		/* turn lock prefix into DS segment override prefix */
1431		if (*ptr == 0xf0)
1432			text_poke(ptr, ((unsigned char []){0x3E}), 1);
1433	}
 
1434}
1435
1436struct smp_alt_module {
1437	/* what is this ??? */
1438	struct module	*mod;
1439	char		*name;
1440
1441	/* ptrs to lock prefixes */
1442	const s32	*locks;
1443	const s32	*locks_end;
1444
1445	/* .text segment, needed to avoid patching init code ;) */
1446	u8		*text;
1447	u8		*text_end;
1448
1449	struct list_head next;
1450};
1451static LIST_HEAD(smp_alt_modules);
1452static bool uniproc_patched = false;	/* protected by text_mutex */
 
1453
1454void __init_or_module alternatives_smp_module_add(struct module *mod,
1455						  char *name,
1456						  void *locks, void *locks_end,
1457						  void *text,  void *text_end)
1458{
1459	struct smp_alt_module *smp;
1460
1461	mutex_lock(&text_mutex);
1462	if (!uniproc_patched)
1463		goto unlock;
1464
1465	if (num_possible_cpus() == 1)
1466		/* Don't bother remembering, we'll never have to undo it. */
1467		goto smp_unlock;
 
 
 
1468
1469	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
1470	if (NULL == smp)
1471		/* we'll run the (safe but slow) SMP code then ... */
1472		goto unlock;
1473
1474	smp->mod	= mod;
1475	smp->name	= name;
1476	smp->locks	= locks;
1477	smp->locks_end	= locks_end;
1478	smp->text	= text;
1479	smp->text_end	= text_end;
1480	DPRINTK(SMP, "locks %p -> %p, text %p -> %p, name %s\n",
1481		smp->locks, smp->locks_end,
1482		smp->text, smp->text_end, smp->name);
1483
 
1484	list_add_tail(&smp->next, &smp_alt_modules);
1485smp_unlock:
1486	alternatives_smp_unlock(locks, locks_end, text, text_end);
1487unlock:
1488	mutex_unlock(&text_mutex);
1489}
1490
1491void __init_or_module alternatives_smp_module_del(struct module *mod)
1492{
1493	struct smp_alt_module *item;
1494
1495	mutex_lock(&text_mutex);
 
 
 
1496	list_for_each_entry(item, &smp_alt_modules, next) {
1497		if (mod != item->mod)
1498			continue;
1499		list_del(&item->next);
 
 
1500		kfree(item);
1501		break;
1502	}
1503	mutex_unlock(&text_mutex);
1504}
1505
1506void alternatives_enable_smp(void)
 
1507{
1508	struct smp_alt_module *mod;
1509
1510	/* Why bother if there are no other CPUs? */
1511	BUG_ON(num_possible_cpus() == 1);
 
 
 
 
 
 
 
 
 
 
 
 
1512
1513	mutex_lock(&text_mutex);
1514
1515	if (uniproc_patched) {
1516		pr_info("switching to SMP code\n");
1517		BUG_ON(num_online_cpus() != 1);
 
 
 
 
 
1518		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
1519		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
1520		list_for_each_entry(mod, &smp_alt_modules, next)
1521			alternatives_smp_lock(mod->locks, mod->locks_end,
1522					      mod->text, mod->text_end);
1523		uniproc_patched = false;
 
 
 
 
 
 
1524	}
1525	mutex_unlock(&text_mutex);
 
1526}
1527
1528/*
1529 * Return 1 if the address range is reserved for SMP-alternatives.
1530 * Must hold text_mutex.
1531 */
1532int alternatives_text_reserved(void *start, void *end)
1533{
1534	struct smp_alt_module *mod;
1535	const s32 *poff;
1536	u8 *text_start = start;
1537	u8 *text_end = end;
1538
1539	lockdep_assert_held(&text_mutex);
1540
1541	list_for_each_entry(mod, &smp_alt_modules, next) {
1542		if (mod->text > text_end || mod->text_end < text_start)
1543			continue;
1544		for (poff = mod->locks; poff < mod->locks_end; poff++) {
1545			const u8 *ptr = (const u8 *)poff + *poff;
1546
1547			if (text_start <= ptr && text_end > ptr)
1548				return 1;
1549		}
1550	}
1551
1552	return 0;
1553}
1554#endif /* CONFIG_SMP */
1555
1556/*
1557 * Self-test for the INT3 based CALL emulation code.
1558 *
1559 * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
1560 * properly and that there is a stack gap between the INT3 frame and the
1561 * previous context. Without this gap doing a virtual PUSH on the interrupted
1562 * stack would corrupt the INT3 IRET frame.
1563 *
1564 * See entry_{32,64}.S for more details.
1565 */
1566
1567/*
1568 * We define the int3_magic() function in assembly to control the calling
1569 * convention such that we can 'call' it from assembly.
1570 */
1571
1572extern void int3_magic(unsigned int *ptr); /* defined in asm */
1573
1574asm (
1575"	.pushsection	.init.text, \"ax\", @progbits\n"
1576"	.type		int3_magic, @function\n"
1577"int3_magic:\n"
1578	ANNOTATE_NOENDBR
1579"	movl	$1, (%" _ASM_ARG1 ")\n"
1580	ASM_RET
1581"	.size		int3_magic, .-int3_magic\n"
1582"	.popsection\n"
1583);
1584
1585extern void int3_selftest_ip(void); /* defined in asm below */
1586
1587static int __init
1588int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
1589{
1590	unsigned long selftest = (unsigned long)&int3_selftest_ip;
1591	struct die_args *args = data;
1592	struct pt_regs *regs = args->regs;
1593
1594	OPTIMIZER_HIDE_VAR(selftest);
 
1595
1596	if (!regs || user_mode(regs))
1597		return NOTIFY_DONE;
1598
1599	if (val != DIE_INT3)
1600		return NOTIFY_DONE;
 
 
 
1601
1602	if (regs->ip - INT3_INSN_SIZE != selftest)
1603		return NOTIFY_DONE;
1604
1605	int3_emulate_call(regs, (unsigned long)&int3_magic);
1606	return NOTIFY_STOP;
1607}
1608
1609/* Must be noinline to ensure uniqueness of int3_selftest_ip. */
1610static noinline void __init int3_selftest(void)
1611{
1612	static __initdata struct notifier_block int3_exception_nb = {
1613		.notifier_call	= int3_exception_notify,
1614		.priority	= INT_MAX-1, /* last */
1615	};
1616	unsigned int val = 0;
1617
1618	BUG_ON(register_die_notifier(&int3_exception_nb));
1619
1620	/*
1621	 * Basically: int3_magic(&val); but really complicated :-)
1622	 *
1623	 * INT3 padded with NOP to CALL_INSN_SIZE. The int3_exception_nb
1624	 * notifier above will emulate CALL for us.
1625	 */
1626	asm volatile ("int3_selftest_ip:\n\t"
1627		      ANNOTATE_NOENDBR
1628		      "    int3; nop; nop; nop; nop\n\t"
1629		      : ASM_CALL_CONSTRAINT
1630		      : __ASM_SEL_RAW(a, D) (&val)
1631		      : "memory");
1632
1633	BUG_ON(val != 1);
1634
1635	unregister_die_notifier(&int3_exception_nb);
1636}
1637
1638static __initdata int __alt_reloc_selftest_addr;
1639
1640extern void __init __alt_reloc_selftest(void *arg);
1641__visible noinline void __init __alt_reloc_selftest(void *arg)
1642{
1643	WARN_ON(arg != &__alt_reloc_selftest_addr);
1644}
1645
1646static noinline void __init alt_reloc_selftest(void)
1647{
1648	/*
1649	 * Tests apply_relocation().
1650	 *
1651	 * This has a relative immediate (CALL) in a place other than the first
1652	 * instruction and additionally on x86_64 we get a RIP-relative LEA:
1653	 *
1654	 *   lea    0x0(%rip),%rdi  # 5d0: R_X86_64_PC32    .init.data+0x5566c
1655	 *   call   +0              # 5d5: R_X86_64_PLT32   __alt_reloc_selftest-0x4
1656	 *
1657	 * Getting this wrong will either crash and burn or tickle the WARN
1658	 * above.
1659	 */
1660	asm_inline volatile (
1661		ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS)
1662		: /* output */
1663		: [mem] "m" (__alt_reloc_selftest_addr)
1664		: _ASM_ARG1
1665	);
1666}
 
 
 
1667
1668void __init alternative_instructions(void)
1669{
1670	int3_selftest();
1671
1672	/*
1673	 * The patching is not fully atomic, so try to avoid local
1674	 * interruptions that might execute the to be patched code.
1675	 * Other CPUs are not running.
1676	 */
1677	stop_nmi();
1678
1679	/*
1680	 * Don't stop machine check exceptions while patching.
1681	 * MCEs only happen when something got corrupted and in this
1682	 * case we must do something about the corruption.
1683	 * Ignoring it is worse than an unlikely patching race.
1684	 * Also machine checks tend to be broadcast and if one CPU
1685	 * goes into machine check the others follow quickly, so we don't
1686	 * expect a machine check to cause undue problems during to code
1687	 * patching.
1688	 */
1689
1690	/*
1691	 * Make sure to set (artificial) features depending on used paravirt
1692	 * functions which can later influence alternative patching.
1693	 */
1694	paravirt_set_cap();
1695
1696	__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
1697			__cfi_sites, __cfi_sites_end, true);
1698
1699	/*
1700	 * Rewrite the retpolines, must be done before alternatives since
1701	 * those can rewrite the retpoline thunks.
1702	 */
1703	apply_retpolines(__retpoline_sites, __retpoline_sites_end);
1704	apply_returns(__return_sites, __return_sites_end);
1705
1706	apply_alternatives(__alt_instructions, __alt_instructions_end);
1707
1708	/*
1709	 * Now all calls are established. Apply the call thunks if
1710	 * required.
1711	 */
1712	callthunks_patch_builtin_calls();
 
 
1713
1714	/*
1715	 * Seal all functions that do not have their address taken.
1716	 */
1717	apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
 
 
1718
1719#ifdef CONFIG_SMP
1720	/* Patch to UP if other cpus not imminent. */
1721	if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
1722		uniproc_patched = true;
1723		alternatives_smp_module_add(NULL, "core kernel",
1724					    __smp_locks, __smp_locks_end,
1725					    _text, _etext);
 
 
 
 
1726	}
 
 
1727
1728	if (!uniproc_patched || num_possible_cpus() == 1) {
1729		free_init_pages("SMP alternatives",
1730				(unsigned long)__smp_locks,
1731				(unsigned long)__smp_locks_end);
1732	}
1733#endif
1734
1735	restart_nmi();
1736	alternatives_patched = 1;
1737
1738	alt_reloc_selftest();
1739}
1740
1741/**
1742 * text_poke_early - Update instructions on a live kernel at boot time
1743 * @addr: address to modify
1744 * @opcode: source of the copy
1745 * @len: length to copy
1746 *
1747 * When you use this code to patch more than one byte of an instruction
1748 * you need to make sure that other CPUs cannot execute this code in parallel.
1749 * Also no thread must be currently preempted in the middle of these
1750 * instructions. And on the local CPU you need to be protected against NMI or
1751 * MCE handlers seeing an inconsistent instruction while you patch.
1752 */
1753void __init_or_module text_poke_early(void *addr, const void *opcode,
1754				      size_t len)
1755{
1756	unsigned long flags;
1757
1758	if (boot_cpu_has(X86_FEATURE_NX) &&
1759	    is_module_text_address((unsigned long)addr)) {
1760		/*
1761		 * Modules text is marked initially as non-executable, so the
1762		 * code cannot be running and speculative code-fetches are
1763		 * prevented. Just change the code.
1764		 */
1765		memcpy(addr, opcode, len);
1766	} else {
1767		local_irq_save(flags);
1768		memcpy(addr, opcode, len);
1769		sync_core();
1770		local_irq_restore(flags);
1771
1772		/*
1773		 * Could also do a CLFLUSH here to speed up CPU recovery; but
1774		 * that causes hangs on some VIA CPUs.
1775		 */
1776	}
1777}
1778
1779typedef struct {
1780	struct mm_struct *mm;
1781} temp_mm_state_t;
1782
1783/*
1784 * Using a temporary mm allows to set temporary mappings that are not accessible
1785 * by other CPUs. Such mappings are needed to perform sensitive memory writes
1786 * that override the kernel memory protections (e.g., W^X), without exposing the
1787 * temporary page-table mappings that are required for these write operations to
1788 * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
1789 * mapping is torn down.
1790 *
1791 * Context: The temporary mm needs to be used exclusively by a single core. To
1792 *          harden security IRQs must be disabled while the temporary mm is
1793 *          loaded, thereby preventing interrupt handler bugs from overriding
1794 *          the kernel memory protection.
1795 */
1796static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
1797{
1798	temp_mm_state_t temp_state;
1799
1800	lockdep_assert_irqs_disabled();
1801
1802	/*
1803	 * Make sure not to be in TLB lazy mode, as otherwise we'll end up
1804	 * with a stale address space WITHOUT being in lazy mode after
1805	 * restoring the previous mm.
1806	 */
1807	if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
1808		leave_mm(smp_processor_id());
1809
1810	temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
1811	switch_mm_irqs_off(NULL, mm, current);
1812
1813	/*
1814	 * If breakpoints are enabled, disable them while the temporary mm is
1815	 * used. Userspace might set up watchpoints on addresses that are used
1816	 * in the temporary mm, which would lead to wrong signals being sent or
1817	 * crashes.
1818	 *
1819	 * Note that breakpoints are not disabled selectively, which also causes
1820	 * kernel breakpoints (e.g., perf's) to be disabled. This might be
1821	 * undesirable, but still seems reasonable as the code that runs in the
1822	 * temporary mm should be short.
1823	 */
1824	if (hw_breakpoint_active())
1825		hw_breakpoint_disable();
1826
1827	return temp_state;
1828}
1829
1830static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
1831{
1832	lockdep_assert_irqs_disabled();
1833	switch_mm_irqs_off(NULL, prev_state.mm, current);
1834
1835	/*
1836	 * Restore the breakpoints if they were disabled before the temporary mm
1837	 * was loaded.
1838	 */
1839	if (hw_breakpoint_active())
1840		hw_breakpoint_restore();
1841}
1842
1843__ro_after_init struct mm_struct *poking_mm;
1844__ro_after_init unsigned long poking_addr;
1845
1846static void text_poke_memcpy(void *dst, const void *src, size_t len)
1847{
1848	memcpy(dst, src, len);
1849}
1850
1851static void text_poke_memset(void *dst, const void *src, size_t len)
1852{
1853	int c = *(const int *)src;
1854
1855	memset(dst, c, len);
1856}
1857
1858typedef void text_poke_f(void *dst, const void *src, size_t len);
1859
1860static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len)
1861{
1862	bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
1863	struct page *pages[2] = {NULL};
1864	temp_mm_state_t prev;
1865	unsigned long flags;
1866	pte_t pte, *ptep;
1867	spinlock_t *ptl;
1868	pgprot_t pgprot;
1869
1870	/*
1871	 * While boot memory allocator is running we cannot use struct pages as
1872	 * they are not yet initialized. There is no way to recover.
1873	 */
1874	BUG_ON(!after_bootmem);
1875
1876	if (!core_kernel_text((unsigned long)addr)) {
1877		pages[0] = vmalloc_to_page(addr);
1878		if (cross_page_boundary)
1879			pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
1880	} else {
1881		pages[0] = virt_to_page(addr);
1882		WARN_ON(!PageReserved(pages[0]));
1883		if (cross_page_boundary)
1884			pages[1] = virt_to_page(addr + PAGE_SIZE);
1885	}
1886	/*
1887	 * If something went wrong, crash and burn since recovery paths are not
1888	 * implemented.
1889	 */
1890	BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
1891
1892	/*
1893	 * Map the page without the global bit, as TLB flushing is done with
1894	 * flush_tlb_mm_range(), which is intended for non-global PTEs.
1895	 */
1896	pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
1897
1898	/*
1899	 * The lock is not really needed, but this allows to avoid open-coding.
1900	 */
1901	ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
1902
1903	/*
1904	 * This must not fail; preallocated in poking_init().
1905	 */
1906	VM_BUG_ON(!ptep);
1907
1908	local_irq_save(flags);
1909
1910	pte = mk_pte(pages[0], pgprot);
1911	set_pte_at(poking_mm, poking_addr, ptep, pte);
1912
1913	if (cross_page_boundary) {
1914		pte = mk_pte(pages[1], pgprot);
1915		set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
1916	}
1917
1918	/*
1919	 * Loading the temporary mm behaves as a compiler barrier, which
1920	 * guarantees that the PTE will be set at the time memcpy() is done.
1921	 */
1922	prev = use_temporary_mm(poking_mm);
1923
1924	kasan_disable_current();
1925	func((u8 *)poking_addr + offset_in_page(addr), src, len);
1926	kasan_enable_current();
1927
1928	/*
1929	 * Ensure that the PTE is only cleared after the instructions of memcpy
1930	 * were issued by using a compiler barrier.
1931	 */
1932	barrier();
1933
1934	pte_clear(poking_mm, poking_addr, ptep);
1935	if (cross_page_boundary)
1936		pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
1937
1938	/*
1939	 * Loading the previous page-table hierarchy requires a serializing
1940	 * instruction that already allows the core to see the updated version.
1941	 * Xen-PV is assumed to serialize execution in a similar manner.
1942	 */
1943	unuse_temporary_mm(prev);
1944
1945	/*
1946	 * Flushing the TLB might involve IPIs, which would require enabled
1947	 * IRQs, but not if the mm is not used, as it is in this point.
1948	 */
1949	flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
1950			   (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
1951			   PAGE_SHIFT, false);
1952
1953	if (func == text_poke_memcpy) {
1954		/*
1955		 * If the text does not match what we just wrote then something is
1956		 * fundamentally screwy; there's nothing we can really do about that.
1957		 */
1958		BUG_ON(memcmp(addr, src, len));
1959	}
1960
1961	local_irq_restore(flags);
1962	pte_unmap_unlock(ptep, ptl);
 
1963	return addr;
1964}
1965
1966/**
1967 * text_poke - Update instructions on a live kernel
1968 * @addr: address to modify
1969 * @opcode: source of the copy
1970 * @len: length to copy
1971 *
1972 * Only atomic text poke/set should be allowed when not doing early patching.
1973 * It means the size must be writable atomically and the address must be aligned
1974 * in a way that permits an atomic write. It also makes sure we fit on a single
1975 * page.
1976 *
1977 * Note that the caller must ensure that if the modified code is part of a
1978 * module, the module would not be removed during poking. This can be achieved
1979 * by registering a module notifier, and ordering module removal and patching
1980 * through a mutex.
1981 */
1982void *text_poke(void *addr, const void *opcode, size_t len)
1983{
1984	lockdep_assert_held(&text_mutex);
 
 
 
1985
1986	return __text_poke(text_poke_memcpy, addr, opcode, len);
1987}
1988
1989/**
1990 * text_poke_kgdb - Update instructions on a live kernel by kgdb
1991 * @addr: address to modify
1992 * @opcode: source of the copy
1993 * @len: length to copy
1994 *
1995 * Only atomic text poke/set should be allowed when not doing early patching.
1996 * It means the size must be writable atomically and the address must be aligned
1997 * in a way that permits an atomic write. It also makes sure we fit on a single
1998 * page.
1999 *
2000 * Context: should only be used by kgdb, which ensures no other core is running,
2001 *	    despite the fact it does not hold the text_mutex.
2002 */
2003void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
2004{
2005	return __text_poke(text_poke_memcpy, addr, opcode, len);
2006}
2007
2008void *text_poke_copy_locked(void *addr, const void *opcode, size_t len,
2009			    bool core_ok)
2010{
2011	unsigned long start = (unsigned long)addr;
2012	size_t patched = 0;
2013
2014	if (WARN_ON_ONCE(!core_ok && core_kernel_text(start)))
2015		return NULL;
2016
2017	while (patched < len) {
2018		unsigned long ptr = start + patched;
2019		size_t s;
2020
2021		s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
2022
2023		__text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s);
2024		patched += s;
2025	}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2026	return addr;
2027}
2028
2029/**
2030 * text_poke_copy - Copy instructions into (an unused part of) RX memory
2031 * @addr: address to modify
2032 * @opcode: source of the copy
2033 * @len: length to copy, could be more than 2x PAGE_SIZE
2034 *
2035 * Not safe against concurrent execution; useful for JITs to dump
2036 * new code blocks into unused regions of RX memory. Can be used in
2037 * conjunction with synchronize_rcu_tasks() to wait for existing
2038 * execution to quiesce after having made sure no existing functions
2039 * pointers are live.
2040 */
2041void *text_poke_copy(void *addr, const void *opcode, size_t len)
2042{
2043	mutex_lock(&text_mutex);
2044	addr = text_poke_copy_locked(addr, opcode, len, false);
2045	mutex_unlock(&text_mutex);
2046	return addr;
2047}
2048
2049/**
2050 * text_poke_set - memset into (an unused part of) RX memory
2051 * @addr: address to modify
2052 * @c: the byte to fill the area with
2053 * @len: length to copy, could be more than 2x PAGE_SIZE
2054 *
2055 * This is useful to overwrite unused regions of RX memory with illegal
2056 * instructions.
2057 */
2058void *text_poke_set(void *addr, int c, size_t len)
2059{
2060	unsigned long start = (unsigned long)addr;
2061	size_t patched = 0;
2062
2063	if (WARN_ON_ONCE(core_kernel_text(start)))
2064		return NULL;
2065
2066	mutex_lock(&text_mutex);
2067	while (patched < len) {
2068		unsigned long ptr = start + patched;
2069		size_t s;
2070
2071		s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched);
2072
2073		__text_poke(text_poke_memset, (void *)ptr, (void *)&c, s);
2074		patched += s;
2075	}
2076	mutex_unlock(&text_mutex);
2077	return addr;
2078}
2079
2080static void do_sync_core(void *info)
2081{
2082	sync_core();
2083}
2084
2085void text_poke_sync(void)
2086{
2087	on_each_cpu(do_sync_core, NULL, 1);
2088}
2089
2090/*
2091 * NOTE: crazy scheme to allow patching Jcc.d32 but not increase the size of
2092 * this thing. When len == 6 everything is prefixed with 0x0f and we map
2093 * opcode to Jcc.d8, using len to distinguish.
2094 */
2095struct text_poke_loc {
2096	/* addr := _stext + rel_addr */
2097	s32 rel_addr;
2098	s32 disp;
2099	u8 len;
2100	u8 opcode;
2101	const u8 text[POKE_MAX_OPCODE_SIZE];
2102	/* see text_poke_bp_batch() */
2103	u8 old;
2104};
2105
2106struct bp_patching_desc {
2107	struct text_poke_loc *vec;
2108	int nr_entries;
2109	atomic_t refs;
2110};
2111
2112static struct bp_patching_desc bp_desc;
2113
2114static __always_inline
2115struct bp_patching_desc *try_get_desc(void)
2116{
2117	struct bp_patching_desc *desc = &bp_desc;
2118
2119	if (!raw_atomic_inc_not_zero(&desc->refs))
2120		return NULL;
2121
2122	return desc;
2123}
2124
2125static __always_inline void put_desc(void)
2126{
2127	struct bp_patching_desc *desc = &bp_desc;
2128
2129	smp_mb__before_atomic();
2130	raw_atomic_dec(&desc->refs);
2131}
2132
2133static __always_inline void *text_poke_addr(struct text_poke_loc *tp)
2134{
2135	return _stext + tp->rel_addr;
2136}
2137
2138static __always_inline int patch_cmp(const void *key, const void *elt)
2139{
2140	struct text_poke_loc *tp = (struct text_poke_loc *) elt;
2141
2142	if (key < text_poke_addr(tp))
2143		return -1;
2144	if (key > text_poke_addr(tp))
2145		return 1;
2146	return 0;
2147}
2148
2149noinstr int poke_int3_handler(struct pt_regs *regs)
2150{
2151	struct bp_patching_desc *desc;
2152	struct text_poke_loc *tp;
2153	int ret = 0;
2154	void *ip;
2155
2156	if (user_mode(regs))
2157		return 0;
2158
2159	/*
2160	 * Having observed our INT3 instruction, we now must observe
2161	 * bp_desc with non-zero refcount:
2162	 *
2163	 *	bp_desc.refs = 1		INT3
2164	 *	WMB				RMB
2165	 *	write INT3			if (bp_desc.refs != 0)
2166	 */
2167	smp_rmb();
2168
2169	desc = try_get_desc();
2170	if (!desc)
2171		return 0;
2172
2173	/*
2174	 * Discount the INT3. See text_poke_bp_batch().
2175	 */
2176	ip = (void *) regs->ip - INT3_INSN_SIZE;
2177
2178	/*
2179	 * Skip the binary search if there is a single member in the vector.
2180	 */
2181	if (unlikely(desc->nr_entries > 1)) {
2182		tp = __inline_bsearch(ip, desc->vec, desc->nr_entries,
2183				      sizeof(struct text_poke_loc),
2184				      patch_cmp);
2185		if (!tp)
2186			goto out_put;
2187	} else {
2188		tp = desc->vec;
2189		if (text_poke_addr(tp) != ip)
2190			goto out_put;
2191	}
2192
2193	ip += tp->len;
2194
2195	switch (tp->opcode) {
2196	case INT3_INSN_OPCODE:
2197		/*
2198		 * Someone poked an explicit INT3, they'll want to handle it,
2199		 * do not consume.
2200		 */
2201		goto out_put;
2202
2203	case RET_INSN_OPCODE:
2204		int3_emulate_ret(regs);
2205		break;
2206
2207	case CALL_INSN_OPCODE:
2208		int3_emulate_call(regs, (long)ip + tp->disp);
2209		break;
2210
2211	case JMP32_INSN_OPCODE:
2212	case JMP8_INSN_OPCODE:
2213		int3_emulate_jmp(regs, (long)ip + tp->disp);
2214		break;
2215
2216	case 0x70 ... 0x7f: /* Jcc */
2217		int3_emulate_jcc(regs, tp->opcode & 0xf, (long)ip, tp->disp);
2218		break;
2219
2220	default:
2221		BUG();
2222	}
2223
2224	ret = 1;
2225
2226out_put:
2227	put_desc();
2228	return ret;
 
2229}
2230
2231#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
2232static struct text_poke_loc tp_vec[TP_VEC_MAX];
2233static int tp_vec_nr;
2234
2235/**
2236 * text_poke_bp_batch() -- update instructions on live kernel on SMP
2237 * @tp:			vector of instructions to patch
2238 * @nr_entries:		number of entries in the vector
 
2239 *
2240 * Modify multi-byte instruction by using int3 breakpoint on SMP.
2241 * We completely avoid stop_machine() here, and achieve the
2242 * synchronization using int3 breakpoint.
2243 *
2244 * The way it is done:
2245 *	- For each entry in the vector:
2246 *		- add a int3 trap to the address that will be patched
2247 *	- sync cores
2248 *	- For each entry in the vector:
2249 *		- update all but the first byte of the patched range
2250 *	- sync cores
2251 *	- For each entry in the vector:
2252 *		- replace the first byte (int3) by the first byte of
2253 *		  replacing opcode
2254 *	- sync cores
2255 */
2256static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
2257{
2258	unsigned char int3 = INT3_INSN_OPCODE;
2259	unsigned int i;
2260	int do_sync;
2261
2262	lockdep_assert_held(&text_mutex);
2263
2264	bp_desc.vec = tp;
2265	bp_desc.nr_entries = nr_entries;
2266
2267	/*
2268	 * Corresponds to the implicit memory barrier in try_get_desc() to
2269	 * ensure reading a non-zero refcount provides up to date bp_desc data.
2270	 */
2271	atomic_set_release(&bp_desc.refs, 1);
2272
2273	/*
2274	 * Function tracing can enable thousands of places that need to be
2275	 * updated. This can take quite some time, and with full kernel debugging
2276	 * enabled, this could cause the softlockup watchdog to trigger.
2277	 * This function gets called every 256 entries added to be patched.
2278	 * Call cond_resched() here to make sure that other tasks can get scheduled
2279	 * while processing all the functions being patched.
2280	 */
2281	cond_resched();
2282
2283	/*
2284	 * Corresponding read barrier in int3 notifier for making sure the
2285	 * nr_entries and handler are correctly ordered wrt. patching.
2286	 */
2287	smp_wmb();
2288
2289	/*
2290	 * First step: add a int3 trap to the address that will be patched.
2291	 */
2292	for (i = 0; i < nr_entries; i++) {
2293		tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
2294		text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
2295	}
2296
2297	text_poke_sync();
2298
2299	/*
2300	 * Second step: update all but the first byte of the patched range.
2301	 */
2302	for (do_sync = 0, i = 0; i < nr_entries; i++) {
2303		u8 old[POKE_MAX_OPCODE_SIZE+1] = { tp[i].old, };
2304		u8 _new[POKE_MAX_OPCODE_SIZE+1];
2305		const u8 *new = tp[i].text;
2306		int len = tp[i].len;
2307
2308		if (len - INT3_INSN_SIZE > 0) {
2309			memcpy(old + INT3_INSN_SIZE,
2310			       text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
2311			       len - INT3_INSN_SIZE);
2312
2313			if (len == 6) {
2314				_new[0] = 0x0f;
2315				memcpy(_new + 1, new, 5);
2316				new = _new;
2317			}
2318
2319			text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
2320				  new + INT3_INSN_SIZE,
2321				  len - INT3_INSN_SIZE);
2322
2323			do_sync++;
2324		}
2325
2326		/*
2327		 * Emit a perf event to record the text poke, primarily to
2328		 * support Intel PT decoding which must walk the executable code
2329		 * to reconstruct the trace. The flow up to here is:
2330		 *   - write INT3 byte
2331		 *   - IPI-SYNC
2332		 *   - write instruction tail
2333		 * At this point the actual control flow will be through the
2334		 * INT3 and handler and not hit the old or new instruction.
2335		 * Intel PT outputs FUP/TIP packets for the INT3, so the flow
2336		 * can still be decoded. Subsequently:
2337		 *   - emit RECORD_TEXT_POKE with the new instruction
2338		 *   - IPI-SYNC
2339		 *   - write first byte
2340		 *   - IPI-SYNC
2341		 * So before the text poke event timestamp, the decoder will see
2342		 * either the old instruction flow or FUP/TIP of INT3. After the
2343		 * text poke event timestamp, the decoder will see either the
2344		 * new instruction flow or FUP/TIP of INT3. Thus decoders can
2345		 * use the timestamp as the point at which to modify the
2346		 * executable code.
2347		 * The old instruction is recorded so that the event can be
2348		 * processed forwards or backwards.
2349		 */
2350		perf_event_text_poke(text_poke_addr(&tp[i]), old, len, new, len);
2351	}
2352
2353	if (do_sync) {
2354		/*
2355		 * According to Intel, this core syncing is very likely
2356		 * not necessary and we'd be safe even without it. But
2357		 * better safe than sorry (plus there's not only Intel).
2358		 */
2359		text_poke_sync();
2360	}
2361
2362	/*
2363	 * Third step: replace the first byte (int3) by the first byte of
2364	 * replacing opcode.
2365	 */
2366	for (do_sync = 0, i = 0; i < nr_entries; i++) {
2367		u8 byte = tp[i].text[0];
2368
2369		if (tp[i].len == 6)
2370			byte = 0x0f;
2371
2372		if (byte == INT3_INSN_OPCODE)
2373			continue;
2374
2375		text_poke(text_poke_addr(&tp[i]), &byte, INT3_INSN_SIZE);
2376		do_sync++;
2377	}
2378
2379	if (do_sync)
2380		text_poke_sync();
2381
2382	/*
2383	 * Remove and wait for refs to be zero.
2384	 */
2385	if (!atomic_dec_and_test(&bp_desc.refs))
2386		atomic_cond_read_acquire(&bp_desc.refs, !VAL);
2387}
2388
2389static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
2390			       const void *opcode, size_t len, const void *emulate)
2391{
2392	struct insn insn;
2393	int ret, i = 0;
2394
2395	if (len == 6)
2396		i = 1;
2397	memcpy((void *)tp->text, opcode+i, len-i);
2398	if (!emulate)
2399		emulate = opcode;
2400
2401	ret = insn_decode_kernel(&insn, emulate);
2402	BUG_ON(ret < 0);
2403
2404	tp->rel_addr = addr - (void *)_stext;
2405	tp->len = len;
2406	tp->opcode = insn.opcode.bytes[0];
2407
2408	if (is_jcc32(&insn)) {
2409		/*
2410		 * Map Jcc.d32 onto Jcc.d8 and use len to distinguish.
2411		 */
2412		tp->opcode = insn.opcode.bytes[1] - 0x10;
2413	}
2414
2415	switch (tp->opcode) {
2416	case RET_INSN_OPCODE:
2417	case JMP32_INSN_OPCODE:
2418	case JMP8_INSN_OPCODE:
2419		/*
2420		 * Control flow instructions without implied execution of the
2421		 * next instruction can be padded with INT3.
2422		 */
2423		for (i = insn.length; i < len; i++)
2424			BUG_ON(tp->text[i] != INT3_INSN_OPCODE);
2425		break;
2426
2427	default:
2428		BUG_ON(len != insn.length);
2429	}
2430
2431	switch (tp->opcode) {
2432	case INT3_INSN_OPCODE:
2433	case RET_INSN_OPCODE:
2434		break;
2435
2436	case CALL_INSN_OPCODE:
2437	case JMP32_INSN_OPCODE:
2438	case JMP8_INSN_OPCODE:
2439	case 0x70 ... 0x7f: /* Jcc */
2440		tp->disp = insn.immediate.value;
2441		break;
2442
2443	default: /* assume NOP */
2444		switch (len) {
2445		case 2: /* NOP2 -- emulate as JMP8+0 */
2446			BUG_ON(memcmp(emulate, x86_nops[len], len));
2447			tp->opcode = JMP8_INSN_OPCODE;
2448			tp->disp = 0;
2449			break;
2450
2451		case 5: /* NOP5 -- emulate as JMP32+0 */
2452			BUG_ON(memcmp(emulate, x86_nops[len], len));
2453			tp->opcode = JMP32_INSN_OPCODE;
2454			tp->disp = 0;
2455			break;
2456
2457		default: /* unknown instruction */
2458			BUG();
2459		}
2460		break;
2461	}
2462}
2463
2464/*
2465 * We hard rely on the tp_vec being ordered; ensure this is so by flushing
2466 * early if needed.
2467 */
2468static bool tp_order_fail(void *addr)
2469{
2470	struct text_poke_loc *tp;
2471
2472	if (!tp_vec_nr)
2473		return false;
2474
2475	if (!addr) /* force */
2476		return true;
2477
2478	tp = &tp_vec[tp_vec_nr - 1];
2479	if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)
2480		return true;
2481
2482	return false;
2483}
2484
2485static void text_poke_flush(void *addr)
2486{
2487	if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
2488		text_poke_bp_batch(tp_vec, tp_vec_nr);
2489		tp_vec_nr = 0;
2490	}
2491}
2492
2493void text_poke_finish(void)
2494{
2495	text_poke_flush(NULL);
2496}
2497
2498void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
2499{
2500	struct text_poke_loc *tp;
2501
2502	text_poke_flush(addr);
2503
2504	tp = &tp_vec[tp_vec_nr++];
2505	text_poke_loc_init(tp, addr, opcode, len, emulate);
2506}
2507
2508/**
2509 * text_poke_bp() -- update instructions on live kernel on SMP
2510 * @addr:	address to patch
2511 * @opcode:	opcode of new instruction
2512 * @len:	length to copy
2513 * @emulate:	instruction to be emulated
 
 
2514 *
2515 * Update a single instruction with the vector in the stack, avoiding
2516 * dynamically allocated memory. This function should be used when it is
2517 * not possible to allocate memory.
2518 */
2519void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
2520{
2521	struct text_poke_loc tp;
2522
2523	text_poke_loc_init(&tp, addr, opcode, len, emulate);
2524	text_poke_bp_batch(&tp, 1);
 
2525}