Linux Audio

Check our new training course

Loading...
v6.2
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 *  linux/arch/arm/boot/compressed/head.S
   4 *
   5 *  Copyright (C) 1996-2002 Russell King
   6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
   7 */
   8#include <linux/linkage.h>
   9#include <asm/assembler.h>
  10#include <asm/v7m.h>
  11
  12#include "efi-header.S"
  13
  14#ifdef __ARMEB__
  15#define OF_DT_MAGIC 0xd00dfeed
  16#else
  17#define OF_DT_MAGIC 0xedfe0dd0
  18#endif
  19
  20 AR_CLASS(	.arch	armv7-a	)
  21 M_CLASS(	.arch	armv7-m	)
  22
  23/*
  24 * Debugging stuff
  25 *
  26 * Note that these macros must not contain any code which is not
  27 * 100% relocatable.  Any attempt to do so will result in a crash.
  28 * Please select one of the following when turning on debugging.
  29 */
  30#ifdef DEBUG
  31
  32#if defined(CONFIG_DEBUG_ICEDCC)
  33
  34#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
  35		.macro	loadsp, rb, tmp1, tmp2
  36		.endm
  37		.macro	writeb, ch, rb, tmp
  38		mcr	p14, 0, \ch, c0, c5, 0
  39		.endm
  40#elif defined(CONFIG_CPU_XSCALE)
  41		.macro	loadsp, rb, tmp1, tmp2
  42		.endm
  43		.macro	writeb, ch, rb, tmp
  44		mcr	p14, 0, \ch, c8, c0, 0
  45		.endm
  46#else
  47		.macro	loadsp, rb, tmp1, tmp2
  48		.endm
  49		.macro	writeb, ch, rb, tmp
  50		mcr	p14, 0, \ch, c1, c0, 0
  51		.endm
  52#endif
  53
  54#else
  55
  56#include CONFIG_DEBUG_LL_INCLUDE
  57
  58		.macro	writeb,	ch, rb, tmp
  59#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
  60		waituartcts \tmp, \rb
  61#endif
  62		waituarttxrdy \tmp, \rb
  63		senduart \ch, \rb
  64		busyuart \tmp, \rb
  65		.endm
  66
  67#if defined(CONFIG_ARCH_SA1100)
  68		.macro	loadsp, rb, tmp1, tmp2
  69		mov	\rb, #0x80000000	@ physical base address
 
 
 
  70		add	\rb, \rb, #0x00010000	@ Ser1
 
  71		.endm
  72#else
  73		.macro	loadsp,	rb, tmp1, tmp2
  74		addruart \rb, \tmp1, \tmp2
  75		.endm
  76#endif
  77#endif
  78#endif
  79
  80		.macro	kputc,val
  81		mov	r0, \val
  82		bl	putc
  83		.endm
  84
  85		.macro	kphex,val,len
  86		mov	r0, \val
  87		mov	r1, #\len
  88		bl	phex
  89		.endm
  90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  91		/*
  92		 * Debug kernel copy by printing the memory addresses involved
  93		 */
  94		.macro dbgkc, begin, end, cbegin, cend
  95#ifdef DEBUG
 
  96		kputc   #'C'
  97		kputc   #':'
  98		kputc   #'0'
  99		kputc   #'x'
 100		kphex   \begin, 8	/* Start of compressed kernel */
 101		kputc	#'-'
 102		kputc	#'0'
 103		kputc	#'x'
 104		kphex	\end, 8		/* End of compressed kernel */
 105		kputc	#'-'
 106		kputc	#'>'
 107		kputc   #'0'
 108		kputc   #'x'
 109		kphex   \cbegin, 8	/* Start of kernel copy */
 110		kputc	#'-'
 111		kputc	#'0'
 112		kputc	#'x'
 113		kphex	\cend, 8	/* End of kernel copy */
 114		kputc	#'\n'
 115#endif
 116		.endm
 117
 118		/*
 119		 * Debug print of the final appended DTB location
 120		 */
 121		.macro dbgadtb, begin, size
 122#ifdef DEBUG
 123		kputc   #'D'
 124		kputc   #'T'
 125		kputc   #'B'
 126		kputc   #':'
 127		kputc   #'0'
 128		kputc   #'x'
 129		kphex   \begin, 8	/* Start of appended DTB */
 130		kputc	#' '
 131		kputc	#'('
 132		kputc	#'0'
 133		kputc	#'x'
 134		kphex	\size, 8	/* Size of appended DTB */
 135		kputc	#')'
 136		kputc	#'\n'
 137#endif
 138		.endm
 139
 140		.macro	enable_cp15_barriers, reg
 141		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
 142		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
 143		bne	.L_\@
 144		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
 145		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
 146 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
 147 THUMB(		isb						)
 148.L_\@:
 149		.endm
 150
 151		/*
 152		 * The kernel build system appends the size of the
 153		 * decompressed kernel at the end of the compressed data
 154		 * in little-endian form.
 155		 */
 156		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
 157		adr	\res, .Linflated_image_size_offset
 158		ldr	\tmp1, [\res]
 159		add	\tmp1, \tmp1, \res	@ address of inflated image size
 160
 161		ldrb	\res, [\tmp1]		@ get_unaligned_le32
 162		ldrb	\tmp2, [\tmp1, #1]
 163		orr	\res, \res, \tmp2, lsl #8
 164		ldrb	\tmp2, [\tmp1, #2]
 165		ldrb	\tmp1, [\tmp1, #3]
 166		orr	\res, \res, \tmp2, lsl #16
 167		orr	\res, \res, \tmp1, lsl #24
 168		.endm
 169
 170		.macro	be32tocpu, val, tmp
 171#ifndef __ARMEB__
 172		/* convert to little endian */
 173		rev_l	\val, \tmp
 174#endif
 175		.endm
 176
 177		.section ".start", "ax"
 178/*
 179 * sort out different calling conventions
 180 */
 181		.align
 182		/*
 183		 * Always enter in ARM state for CPUs that support the ARM ISA.
 184		 * As of today (2014) that's exactly the members of the A and R
 185		 * classes.
 186		 */
 187 AR_CLASS(	.arm	)
 188start:
 189		.type	start,#function
 190		/*
 191		 * These 7 nops along with the 1 nop immediately below for
 192		 * !THUMB2 form 8 nops that make the compressed kernel bootable
 193		 * on legacy ARM systems that were assuming the kernel in a.out
 194		 * binary format. The boot loaders on these systems would
 195		 * jump 32 bytes into the image to skip the a.out header.
 196		 * with these 8 nops filling exactly 32 bytes, things still
 197		 * work as expected on these legacy systems. Thumb2 mode keeps
 198		 * 7 of the nops as it turns out that some boot loaders
 199		 * were patching the initial instructions of the kernel, i.e
 200		 * had started to exploit this "patch area".
 201		 */
 202		__initial_nops
 203		.rept	5
 204		__nop
 205		.endr
 206#ifndef CONFIG_THUMB2_KERNEL
 207		__nop
 208#else
 209 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
 210  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
 211		.thumb
 212#endif
 213		W(b)	1f
 214
 215		.word	_magic_sig	@ Magic numbers to help the loader
 216		.word	_magic_start	@ absolute load/run zImage address
 217		.word	_magic_end	@ zImage end address
 218		.word	0x04030201	@ endianness flag
 219		.word	0x45454545	@ another magic number to indicate
 220		.word	_magic_table	@ additional data table
 221
 222		__EFI_HEADER
 2231:
 224 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
 225 AR_CLASS(	mrs	r9, cpsr	)
 226#ifdef CONFIG_ARM_VIRT_EXT
 227		bl	__hyp_stub_install	@ get into SVC mode, reversibly
 228#endif
 229		mov	r7, r1			@ save architecture ID
 230		mov	r8, r2			@ save atags pointer
 231
 232#ifndef CONFIG_CPU_V7M
 233		/*
 234		 * Booting from Angel - need to enter SVC mode and disable
 235		 * FIQs/IRQs (numeric definitions from angel arm.h source).
 236		 * We only do this if we were in user mode on entry.
 237		 */
 238		mrs	r2, cpsr		@ get current mode
 239		tst	r2, #3			@ not user?
 240		bne	not_angel
 241		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
 242 ARM(		swi	0x123456	)	@ angel_SWI_ARM
 243 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
 244not_angel:
 245		safe_svcmode_maskall r0
 246		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
 247						@ SPSR
 248#endif
 249		/*
 250		 * Note that some cache flushing and other stuff may
 251		 * be needed here - is there an Angel SWI call for this?
 252		 */
 253
 254		/*
 255		 * some architecture specific code can be inserted
 256		 * by the linker here, but it should preserve r7, r8, and r9.
 257		 */
 258
 259		.text
 260
 261#ifdef CONFIG_AUTO_ZRELADDR
 262		/*
 263		 * Find the start of physical memory.  As we are executing
 264		 * without the MMU on, we are in the physical address space.
 265		 * We just need to get rid of any offset by aligning the
 266		 * address.
 267		 *
 268		 * This alignment is a balance between the requirements of
 269		 * different platforms - we have chosen 128MB to allow
 270		 * platforms which align the start of their physical memory
 271		 * to 128MB to use this feature, while allowing the zImage
 272		 * to be placed within the first 128MB of memory on other
 273		 * platforms.  Increasing the alignment means we place
 274		 * stricter alignment requirements on the start of physical
 275		 * memory, but relaxing it means that we break people who
 276		 * are already placing their zImage in (eg) the top 64MB
 277		 * of this range.
 278		 */
 279		mov	r0, pc
 280		and	r0, r0, #0xf8000000
 281#ifdef CONFIG_USE_OF
 282		adr	r1, LC1
 283#ifdef CONFIG_ARM_APPENDED_DTB
 284		/*
 285		 * Look for an appended DTB.  If found, we cannot use it to
 286		 * validate the calculated start of physical memory, as its
 287		 * memory nodes may need to be augmented by ATAGS stored at
 288		 * an offset from the same start of physical memory.
 289		 */
 290		ldr	r2, [r1, #4]	@ get &_edata
 291		add	r2, r2, r1	@ relocate it
 292		ldr	r2, [r2]	@ get DTB signature
 293		ldr	r3, =OF_DT_MAGIC
 294		cmp	r2, r3		@ do we have a DTB there?
 295		beq	1f		@ if yes, skip validation
 296#endif /* CONFIG_ARM_APPENDED_DTB */
 297
 298		/*
 299		 * Make sure we have some stack before calling C code.
 300		 * No GOT fixup has occurred yet, but none of the code we're
 301		 * about to call uses any global variables.
 302		 */
 303		ldr	sp, [r1]	@ get stack location
 304		add	sp, sp, r1	@ apply relocation
 305
 306		/* Validate calculated start against passed DTB */
 307		mov	r1, r8
 308		bl	fdt_check_mem_start
 3091:
 310#endif /* CONFIG_USE_OF */
 311		/* Determine final kernel image address. */
 312		add	r4, r0, #TEXT_OFFSET
 313#else
 314		ldr	r4, =zreladdr
 315#endif
 316
 317		/*
 318		 * Set up a page table only if it won't overwrite ourself.
 319		 * That means r4 < pc || r4 - 16k page directory > &_end.
 320		 * Given that r4 > &_end is most unfrequent, we add a rough
 321		 * additional 1MB of room for a possible appended DTB.
 322		 */
 323		mov	r0, pc
 324		cmp	r0, r4
 325		ldrcc	r0, .Lheadroom
 326		addcc	r0, r0, pc
 327		cmpcc	r4, r0
 328		orrcc	r4, r4, #1		@ remember we skipped cache_on
 329		blcs	cache_on
 330
 331restart:	adr	r0, LC1
 332		ldr	sp, [r0]
 333		ldr	r6, [r0, #4]
 334		add	sp, sp, r0
 335		add	r6, r6, r0
 336
 337		get_inflated_image_size	r9, r10, lr
 338
 339#ifndef CONFIG_ZBOOT_ROM
 340		/* malloc space is above the relocated stack (64k max) */
 341		add	r10, sp, #MALLOC_SIZE
 342#else
 343		/*
 344		 * With ZBOOT_ROM the bss/stack is non relocatable,
 345		 * but someone could still run this code from RAM,
 346		 * in which case our reference is _edata.
 347		 */
 348		mov	r10, r6
 349#endif
 350
 351		mov	r5, #0			@ init dtb size to 0
 352#ifdef CONFIG_ARM_APPENDED_DTB
 353/*
 354 *   r4  = final kernel address (possibly with LSB set)
 355 *   r5  = appended dtb size (still unknown)
 356 *   r6  = _edata
 357 *   r7  = architecture ID
 358 *   r8  = atags/device tree pointer
 359 *   r9  = size of decompressed image
 360 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 361 *   sp  = stack pointer
 362 *
 363 * if there are device trees (dtb) appended to zImage, advance r10 so that the
 364 * dtb data will get relocated along with the kernel if necessary.
 365 */
 366
 367		ldr	lr, [r6, #0]
 368		ldr	r1, =OF_DT_MAGIC
 
 
 
 
 369		cmp	lr, r1
 370		bne	dtb_check_done		@ not found
 371
 372#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
 373		/*
 374		 * OK... Let's do some funky business here.
 375		 * If we do have a DTB appended to zImage, and we do have
 376		 * an ATAG list around, we want the later to be translated
 377		 * and folded into the former here. No GOT fixup has occurred
 378		 * yet, but none of the code we're about to call uses any
 379		 * global variable.
 380		*/
 381
 382		/* Get the initial DTB size */
 383		ldr	r5, [r6, #4]
 384		be32tocpu r5, r1
 385		dbgadtb	r6, r5
 
 
 
 
 
 386		/* 50% DTB growth should be good enough */
 387		add	r5, r5, r5, lsr #1
 388		/* preserve 64-bit alignment */
 389		add	r5, r5, #7
 390		bic	r5, r5, #7
 391		/* clamp to 32KB min and 1MB max */
 392		cmp	r5, #(1 << 15)
 393		movlo	r5, #(1 << 15)
 394		cmp	r5, #(1 << 20)
 395		movhi	r5, #(1 << 20)
 396		/* temporarily relocate the stack past the DTB work space */
 397		add	sp, sp, r5
 398
 399		mov	r0, r8
 400		mov	r1, r6
 401		mov	r2, r5
 402		bl	atags_to_fdt
 403
 404		/*
 405		 * If returned value is 1, there is no ATAG at the location
 406		 * pointed by r8.  Try the typical 0x100 offset from start
 407		 * of RAM and hope for the best.
 408		 */
 409		cmp	r0, #1
 410		sub	r0, r4, #TEXT_OFFSET
 411		bic	r0, r0, #1
 412		add	r0, r0, #0x100
 413		mov	r1, r6
 414		mov	r2, r5
 415		bleq	atags_to_fdt
 416
 417		sub	sp, sp, r5
 418#endif
 419
 420		mov	r8, r6			@ use the appended device tree
 421
 422		/*
 423		 * Make sure that the DTB doesn't end up in the final
 424		 * kernel's .bss area. To do so, we adjust the decompressed
 425		 * kernel size to compensate if that .bss size is larger
 426		 * than the relocated code.
 427		 */
 428		ldr	r5, =_kernel_bss_size
 429		adr	r1, wont_overwrite
 430		sub	r1, r6, r1
 431		subs	r1, r5, r1
 432		addhi	r9, r9, r1
 433
 434		/* Get the current DTB size */
 435		ldr	r5, [r6, #4]
 436		be32tocpu r5, r1
 
 
 
 
 
 
 437
 438		/* preserve 64-bit alignment */
 439		add	r5, r5, #7
 440		bic	r5, r5, #7
 441
 442		/* relocate some pointers past the appended dtb */
 443		add	r6, r6, r5
 444		add	r10, r10, r5
 445		add	sp, sp, r5
 446dtb_check_done:
 447#endif
 448
 449/*
 450 * Check to see if we will overwrite ourselves.
 451 *   r4  = final kernel address (possibly with LSB set)
 452 *   r9  = size of decompressed image
 453 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 454 * We basically want:
 455 *   r4 - 16k page directory >= r10 -> OK
 456 *   r4 + image length <= address of wont_overwrite -> OK
 457 * Note: the possible LSB in r4 is harmless here.
 458 */
 459		add	r10, r10, #16384
 460		cmp	r4, r10
 461		bhs	wont_overwrite
 462		add	r10, r4, r9
 463		adr	r9, wont_overwrite
 464		cmp	r10, r9
 465		bls	wont_overwrite
 466
 467/*
 468 * Relocate ourselves past the end of the decompressed kernel.
 469 *   r6  = _edata
 470 *   r10 = end of the decompressed kernel
 471 * Because we always copy ahead, we need to do it from the end and go
 472 * backward in case the source and destination overlap.
 473 */
 474		/*
 475		 * Bump to the next 256-byte boundary with the size of
 476		 * the relocation code added. This avoids overwriting
 477		 * ourself when the offset is small.
 478		 */
 479		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
 480		bic	r10, r10, #255
 481
 482		/* Get start of code we want to copy and align it down. */
 483		adr	r5, restart
 484		bic	r5, r5, #31
 485
 486/* Relocate the hyp vector base if necessary */
 487#ifdef CONFIG_ARM_VIRT_EXT
 488		mrs	r0, spsr
 489		and	r0, r0, #MODE_MASK
 490		cmp	r0, #HYP_MODE
 491		bne	1f
 492
 493		/*
 494		 * Compute the address of the hyp vectors after relocation.
 
 
 495		 * Call __hyp_set_vectors with the new address so that we
 496		 * can HVC again after the copy.
 497		 */
 498		adr_l	r0, __hyp_stub_vectors
 
 
 
 499		sub	r0, r0, r5
 500		add	r0, r0, r10
 501		bl	__hyp_set_vectors
 5021:
 503#endif
 504
 505		sub	r9, r6, r5		@ size to copy
 506		add	r9, r9, #31		@ rounded up to a multiple
 507		bic	r9, r9, #31		@ ... of 32 bytes
 508		add	r6, r9, r5
 509		add	r9, r9, r10
 510
 511#ifdef DEBUG
 512		sub     r10, r6, r5
 513		sub     r10, r9, r10
 514		/*
 515		 * We are about to copy the kernel to a new memory area.
 516		 * The boundaries of the new memory area can be found in
 517		 * r10 and r9, whilst r5 and r6 contain the boundaries
 518		 * of the memory we are going to copy.
 519		 * Calling dbgkc will help with the printing of this
 520		 * information.
 521		 */
 522		dbgkc	r5, r6, r10, r9
 523#endif
 524
 5251:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
 526		cmp	r6, r5
 527		stmdb	r9!, {r0 - r3, r10 - r12, lr}
 528		bhi	1b
 529
 530		/* Preserve offset to relocated code. */
 531		sub	r6, r9, r6
 532
 533		mov	r0, r9			@ start of relocated zImage
 534		add	r1, sp, r6		@ end of relocated zImage
 535		bl	cache_clean_flush
 536
 537		badr	r0, restart
 538		add	r0, r0, r6
 539		mov	pc, r0
 540
 541wont_overwrite:
 542		adr	r0, LC0
 543		ldmia	r0, {r1, r2, r3, r11, r12}
 544		sub	r0, r0, r1		@ calculate the delta offset
 545
 546/*
 547 * If delta is zero, we are running at the address we were linked at.
 548 *   r0  = delta
 549 *   r2  = BSS start
 550 *   r3  = BSS end
 551 *   r4  = kernel execution address (possibly with LSB set)
 552 *   r5  = appended dtb size (0 if not present)
 553 *   r7  = architecture ID
 554 *   r8  = atags pointer
 555 *   r11 = GOT start
 556 *   r12 = GOT end
 557 *   sp  = stack pointer
 558 */
 559		orrs	r1, r0, r5
 560		beq	not_relocated
 561
 562		add	r11, r11, r0
 563		add	r12, r12, r0
 564
 565#ifndef CONFIG_ZBOOT_ROM
 566		/*
 567		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
 568		 * we need to fix up pointers into the BSS region.
 569		 * Note that the stack pointer has already been fixed up.
 570		 */
 571		add	r2, r2, r0
 572		add	r3, r3, r0
 573
 574		/*
 575		 * Relocate all entries in the GOT table.
 576		 * Bump bss entries to _edata + dtb size
 577		 */
 5781:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
 579		add	r1, r1, r0		@ This fixes up C references
 580		cmp	r1, r2			@ if entry >= bss_start &&
 581		cmphs	r3, r1			@       bss_end > entry
 582		addhi	r1, r1, r5		@    entry += dtb size
 583		str	r1, [r11], #4		@ next entry
 584		cmp	r11, r12
 585		blo	1b
 586
 587		/* bump our bss pointers too */
 588		add	r2, r2, r5
 589		add	r3, r3, r5
 590
 591#else
 592
 593		/*
 594		 * Relocate entries in the GOT table.  We only relocate
 595		 * the entries that are outside the (relocated) BSS region.
 596		 */
 5971:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
 598		cmp	r1, r2			@ entry < bss_start ||
 599		cmphs	r3, r1			@ _end < entry
 600		addlo	r1, r1, r0		@ table.  This fixes up the
 601		str	r1, [r11], #4		@ C references.
 602		cmp	r11, r12
 603		blo	1b
 604#endif
 605
 606not_relocated:	mov	r0, #0
 6071:		str	r0, [r2], #4		@ clear bss
 608		str	r0, [r2], #4
 609		str	r0, [r2], #4
 610		str	r0, [r2], #4
 611		cmp	r2, r3
 612		blo	1b
 613
 614		/*
 615		 * Did we skip the cache setup earlier?
 616		 * That is indicated by the LSB in r4.
 617		 * Do it now if so.
 618		 */
 619		tst	r4, #1
 620		bic	r4, r4, #1
 621		blne	cache_on
 622
 623/*
 624 * The C runtime environment should now be setup sufficiently.
 625 * Set up some pointers, and start decompressing.
 626 *   r4  = kernel execution address
 627 *   r7  = architecture ID
 628 *   r8  = atags pointer
 629 */
 630		mov	r0, r4
 631		mov	r1, sp			@ malloc space above stack
 632		add	r2, sp, #MALLOC_SIZE	@ 64k max
 633		mov	r3, r7
 634		bl	decompress_kernel
 635
 636		get_inflated_image_size	r1, r2, r3
 637
 638		mov	r0, r4			@ start of inflated image
 639		add	r1, r1, r0		@ end of inflated image
 640		bl	cache_clean_flush
 641		bl	cache_off
 642
 643#ifdef CONFIG_ARM_VIRT_EXT
 644		mrs	r0, spsr		@ Get saved CPU boot mode
 645		and	r0, r0, #MODE_MASK
 646		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
 647		bne	__enter_kernel		@ boot kernel directly
 648
 649		adr_l	r0, __hyp_reentry_vectors
 
 
 
 650		bl	__hyp_set_vectors
 651		__HVC(0)			@ otherwise bounce to hyp mode
 652
 653		b	.			@ should never be reached
 
 
 
 654#else
 655		b	__enter_kernel
 656#endif
 657
 658		.align	2
 659		.type	LC0, #object
 660LC0:		.word	LC0			@ r1
 661		.word	__bss_start		@ r2
 662		.word	_end			@ r3
 663		.word	_got_start		@ r11
 664		.word	_got_end		@ ip
 665		.size	LC0, . - LC0
 666
 667		.type	LC1, #object
 668LC1:		.word	.L_user_stack_end - LC1	@ sp
 669		.word	_edata - LC1		@ r6
 670		.size	LC1, . - LC1
 671
 672.Lheadroom:
 673		.word	_end - restart + 16384 + 1024*1024
 674
 675.Linflated_image_size_offset:
 676		.long	(input_data_end - 4) - .
 677
 678#ifdef CONFIG_ARCH_RPC
 679		.globl	params
 680params:		ldr	r0, =0x10000100		@ params_phys for RPC
 681		mov	pc, lr
 682		.ltorg
 683		.align
 684#endif
 685
 686/*
 687 * dcache_line_size - get the minimum D-cache line size from the CTR register
 688 * on ARMv7.
 689 */
 690		.macro	dcache_line_size, reg, tmp
 691#ifdef CONFIG_CPU_V7M
 692		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
 693		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
 694		ldr	\tmp, [\tmp]
 695#else
 696		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
 697#endif
 698		lsr	\tmp, \tmp, #16
 699		and	\tmp, \tmp, #0xf		@ cache line size encoding
 700		mov	\reg, #4			@ bytes per word
 701		mov	\reg, \reg, lsl \tmp		@ actual cache line size
 702		.endm
 703
 704/*
 705 * Turn on the cache.  We need to setup some page tables so that we
 706 * can have both the I and D caches on.
 707 *
 708 * We place the page tables 16k down from the kernel execution address,
 709 * and we hope that nothing else is using it.  If we're using it, we
 710 * will go pop!
 711 *
 712 * On entry,
 713 *  r4 = kernel execution address
 714 *  r7 = architecture number
 715 *  r8 = atags pointer
 716 * On exit,
 717 *  r0, r1, r2, r3, r9, r10, r12 corrupted
 718 * This routine must preserve:
 719 *  r4, r7, r8
 720 */
 721		.align	5
 722cache_on:	mov	r3, #8			@ cache_on function
 723		b	call_cache_fn
 724
 725/*
 726 * Initialize the highest priority protection region, PR7
 727 * to cover all 32bit address and cacheable and bufferable.
 728 */
 729__armv4_mpu_cache_on:
 730		mov	r0, #0x3f		@ 4G, the whole
 731		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
 732		mcr 	p15, 0, r0, c6, c7, 1
 733
 734		mov	r0, #0x80		@ PR7
 735		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
 736		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
 737		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
 738
 739		mov	r0, #0xc000
 740		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
 741		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
 742
 743		mov	r0, #0
 744		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 745		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
 746		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
 747		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 748						@ ...I .... ..D. WC.M
 749		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
 750		orr	r0, r0, #0x1000		@ ...1 .... .... ....
 751
 752		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
 753
 754		mov	r0, #0
 755		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
 756		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
 757		mov	pc, lr
 758
 759__armv3_mpu_cache_on:
 760		mov	r0, #0x3f		@ 4G, the whole
 761		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
 762
 763		mov	r0, #0x80		@ PR7
 764		mcr	p15, 0, r0, c2, c0, 0	@ cache on
 765		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
 766
 767		mov	r0, #0xc000
 768		mcr	p15, 0, r0, c5, c0, 0	@ access permission
 769
 770		mov	r0, #0
 771		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
 772		/*
 773		 * ?? ARMv3 MMU does not allow reading the control register,
 774		 * does this really work on ARMv3 MPU?
 775		 */
 776		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 777						@ .... .... .... WC.M
 778		orr	r0, r0, #0x000d		@ .... .... .... 11.1
 779		/* ?? this overwrites the value constructed above? */
 780		mov	r0, #0
 781		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
 782
 783		/* ?? invalidate for the second time? */
 784		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
 785		mov	pc, lr
 786
 787#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 788#define CB_BITS 0x08
 789#else
 790#define CB_BITS 0x0c
 791#endif
 792
 793__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
 794		bic	r3, r3, #0xff		@ Align the pointer
 795		bic	r3, r3, #0x3f00
 796/*
 797 * Initialise the page tables, turning on the cacheable and bufferable
 798 * bits for the RAM area only.
 799 */
 800		mov	r0, r3
 801		mov	r9, r0, lsr #18
 802		mov	r9, r9, lsl #18		@ start of RAM
 803		add	r10, r9, #0x10000000	@ a reasonable RAM size
 804		mov	r1, #0x12		@ XN|U + section mapping
 805		orr	r1, r1, #3 << 10	@ AP=11
 806		add	r2, r3, #16384
 8071:		cmp	r1, r9			@ if virt > start of RAM
 808		cmphs	r10, r1			@   && end of RAM > virt
 809		bic	r1, r1, #0x1c		@ clear XN|U + C + B
 810		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
 811		orrhs	r1, r1, r6		@ set RAM section settings
 812		str	r1, [r0], #4		@ 1:1 mapping
 813		add	r1, r1, #1048576
 814		teq	r0, r2
 815		bne	1b
 816/*
 817 * If ever we are running from Flash, then we surely want the cache
 818 * to be enabled also for our execution instance...  We map 2MB of it
 819 * so there is no map overlap problem for up to 1 MB compressed kernel.
 820 * If the execution is in RAM then we would only be duplicating the above.
 821 */
 822		orr	r1, r6, #0x04		@ ensure B is set for this
 823		orr	r1, r1, #3 << 10
 824		mov	r2, pc
 825		mov	r2, r2, lsr #20
 826		orr	r1, r1, r2, lsl #20
 827		add	r0, r3, r2, lsl #2
 828		str	r1, [r0], #4
 829		add	r1, r1, #1048576
 830		str	r1, [r0]
 831		mov	pc, lr
 832ENDPROC(__setup_mmu)
 833
 834@ Enable unaligned access on v6, to allow better code generation
 835@ for the decompressor C code:
 836__armv6_mmu_cache_on:
 837		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
 838		bic	r0, r0, #2		@ A (no unaligned access fault)
 839		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
 840		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
 841		b	__armv4_mmu_cache_on
 842
 843__arm926ejs_mmu_cache_on:
 844#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 845		mov	r0, #4			@ put dcache in WT mode
 846		mcr	p15, 7, r0, c15, c0, 0
 847#endif
 848
 849__armv4_mmu_cache_on:
 850		mov	r12, lr
 851#ifdef CONFIG_MMU
 852		mov	r6, #CB_BITS | 0x12	@ U
 853		bl	__setup_mmu
 854		mov	r0, #0
 855		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 856		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 857		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 858		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 859		orr	r0, r0, #0x0030
 860 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
 861		bl	__common_mmu_cache_on
 862		mov	r0, #0
 863		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 864#endif
 865		mov	pc, r12
 866
 867__armv7_mmu_cache_on:
 868		enable_cp15_barriers	r11
 869		mov	r12, lr
 870#ifdef CONFIG_MMU
 871		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
 872		tst	r11, #0xf		@ VMSA
 873		movne	r6, #CB_BITS | 0x02	@ !XN
 874		blne	__setup_mmu
 875		mov	r0, #0
 876		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 877		tst	r11, #0xf		@ VMSA
 878		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 879#endif
 880		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 881		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
 882		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 883		orr	r0, r0, #0x003c		@ write buffer
 884		bic	r0, r0, #2		@ A (no unaligned access fault)
 885		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
 886						@ (needed for ARM1176)
 887#ifdef CONFIG_MMU
 888 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
 889		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
 890		orrne	r0, r0, #1		@ MMU enabled
 891		movne	r1, #0xfffffffd		@ domain 0 = client
 892		bic     r6, r6, #1 << 31        @ 32-bit translation system
 893		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
 894		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
 895		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
 896		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 897#endif
 898		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 899		mcr	p15, 0, r0, c1, c0, 0	@ load control register
 900		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
 901		mov	r0, #0
 902		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 903		mov	pc, r12
 904
 905__fa526_cache_on:
 906		mov	r12, lr
 907		mov	r6, #CB_BITS | 0x12	@ U
 908		bl	__setup_mmu
 909		mov	r0, #0
 910		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
 911		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 912		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
 913		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 914		orr	r0, r0, #0x1000		@ I-cache enable
 915		bl	__common_mmu_cache_on
 916		mov	r0, #0
 917		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
 918		mov	pc, r12
 919
 920__common_mmu_cache_on:
 921#ifndef CONFIG_THUMB2_KERNEL
 922#ifndef DEBUG
 923		orr	r0, r0, #0x000d		@ Write buffer, mmu
 924#endif
 925		mov	r1, #-1
 926		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
 927		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
 928		b	1f
 929		.align	5			@ cache line aligned
 9301:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
 931		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
 932		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
 933#endif
 934
 935#define PROC_ENTRY_SIZE (4*5)
 936
 937/*
 938 * Here follow the relocatable cache support functions for the
 939 * various processors.  This is a generic hook for locating an
 940 * entry and jumping to an instruction at the specified offset
 941 * from the start of the block.  Please note this is all position
 942 * independent code.
 943 *
 944 *  r1  = corrupted
 945 *  r2  = corrupted
 946 *  r3  = block offset
 947 *  r9  = corrupted
 948 *  r12 = corrupted
 949 */
 950
 951call_cache_fn:	adr	r12, proc_types
 952#ifdef CONFIG_CPU_CP15
 953		mrc	p15, 0, r9, c0, c0	@ get processor ID
 954#elif defined(CONFIG_CPU_V7M)
 955		/*
 956		 * On v7-M the processor id is located in the V7M_SCB_CPUID
 957		 * register, but as cache handling is IMPLEMENTATION DEFINED on
 958		 * v7-M (if existant at all) we just return early here.
 959		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
 960		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
 961		 * use cp15 registers that are not implemented on v7-M.
 962		 */
 963		bx	lr
 964#else
 965		ldr	r9, =CONFIG_PROCESSOR_ID
 966#endif
 9671:		ldr	r1, [r12, #0]		@ get value
 968		ldr	r2, [r12, #4]		@ get mask
 969		eor	r1, r1, r9		@ (real ^ match)
 970		tst	r1, r2			@       & mask
 971 ARM(		addeq	pc, r12, r3		) @ call cache function
 972 THUMB(		addeq	r12, r3			)
 973 THUMB(		moveq	pc, r12			) @ call cache function
 974		add	r12, r12, #PROC_ENTRY_SIZE
 975		b	1b
 976
 977/*
 978 * Table for cache operations.  This is basically:
 979 *   - CPU ID match
 980 *   - CPU ID mask
 981 *   - 'cache on' method instruction
 982 *   - 'cache off' method instruction
 983 *   - 'cache flush' method instruction
 984 *
 985 * We match an entry using: ((real_id ^ match) & mask) == 0
 986 *
 987 * Writethrough caches generally only need 'on' and 'off'
 988 * methods.  Writeback caches _must_ have the flush method
 989 * defined.
 990 */
 991		.align	2
 992		.type	proc_types,#object
 993proc_types:
 994		.word	0x41000000		@ old ARM ID
 995		.word	0xff00f000
 996		mov	pc, lr
 997 THUMB(		nop				)
 998		mov	pc, lr
 999 THUMB(		nop				)
1000		mov	pc, lr
1001 THUMB(		nop				)
1002
1003		.word	0x41007000		@ ARM7/710
1004		.word	0xfff8fe00
1005		mov	pc, lr
1006 THUMB(		nop				)
1007		mov	pc, lr
1008 THUMB(		nop				)
1009		mov	pc, lr
1010 THUMB(		nop				)
1011
1012		.word	0x41807200		@ ARM720T (writethrough)
1013		.word	0xffffff00
1014		W(b)	__armv4_mmu_cache_on
1015		W(b)	__armv4_mmu_cache_off
1016		mov	pc, lr
1017 THUMB(		nop				)
1018
1019		.word	0x41007400		@ ARM74x
1020		.word	0xff00ff00
1021		W(b)	__armv3_mpu_cache_on
1022		W(b)	__armv3_mpu_cache_off
1023		W(b)	__armv3_mpu_cache_flush
1024		
1025		.word	0x41009400		@ ARM94x
1026		.word	0xff00ff00
1027		W(b)	__armv4_mpu_cache_on
1028		W(b)	__armv4_mpu_cache_off
1029		W(b)	__armv4_mpu_cache_flush
1030
1031		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1032		.word	0xff0ffff0
1033		W(b)	__arm926ejs_mmu_cache_on
1034		W(b)	__armv4_mmu_cache_off
1035		W(b)	__armv5tej_mmu_cache_flush
1036
1037		.word	0x00007000		@ ARM7 IDs
1038		.word	0x0000f000
1039		mov	pc, lr
1040 THUMB(		nop				)
1041		mov	pc, lr
1042 THUMB(		nop				)
1043		mov	pc, lr
1044 THUMB(		nop				)
1045
1046		@ Everything from here on will be the new ID system.
1047
1048		.word	0x4401a100		@ sa110 / sa1100
1049		.word	0xffffffe0
1050		W(b)	__armv4_mmu_cache_on
1051		W(b)	__armv4_mmu_cache_off
1052		W(b)	__armv4_mmu_cache_flush
1053
1054		.word	0x6901b110		@ sa1110
1055		.word	0xfffffff0
1056		W(b)	__armv4_mmu_cache_on
1057		W(b)	__armv4_mmu_cache_off
1058		W(b)	__armv4_mmu_cache_flush
1059
1060		.word	0x56056900
1061		.word	0xffffff00		@ PXA9xx
1062		W(b)	__armv4_mmu_cache_on
1063		W(b)	__armv4_mmu_cache_off
1064		W(b)	__armv4_mmu_cache_flush
1065
1066		.word	0x56158000		@ PXA168
1067		.word	0xfffff000
1068		W(b)	__armv4_mmu_cache_on
1069		W(b)	__armv4_mmu_cache_off
1070		W(b)	__armv5tej_mmu_cache_flush
1071
1072		.word	0x56050000		@ Feroceon
1073		.word	0xff0f0000
1074		W(b)	__armv4_mmu_cache_on
1075		W(b)	__armv4_mmu_cache_off
1076		W(b)	__armv5tej_mmu_cache_flush
1077
1078#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1079		/* this conflicts with the standard ARMv5TE entry */
1080		.long	0x41009260		@ Old Feroceon
1081		.long	0xff00fff0
1082		b	__armv4_mmu_cache_on
1083		b	__armv4_mmu_cache_off
1084		b	__armv5tej_mmu_cache_flush
1085#endif
1086
1087		.word	0x66015261		@ FA526
1088		.word	0xff01fff1
1089		W(b)	__fa526_cache_on
1090		W(b)	__armv4_mmu_cache_off
1091		W(b)	__fa526_cache_flush
1092
1093		@ These match on the architecture ID
1094
1095		.word	0x00020000		@ ARMv4T
1096		.word	0x000f0000
1097		W(b)	__armv4_mmu_cache_on
1098		W(b)	__armv4_mmu_cache_off
1099		W(b)	__armv4_mmu_cache_flush
1100
1101		.word	0x00050000		@ ARMv5TE
1102		.word	0x000f0000
1103		W(b)	__armv4_mmu_cache_on
1104		W(b)	__armv4_mmu_cache_off
1105		W(b)	__armv4_mmu_cache_flush
1106
1107		.word	0x00060000		@ ARMv5TEJ
1108		.word	0x000f0000
1109		W(b)	__armv4_mmu_cache_on
1110		W(b)	__armv4_mmu_cache_off
1111		W(b)	__armv5tej_mmu_cache_flush
1112
1113		.word	0x0007b000		@ ARMv6
1114		.word	0x000ff000
1115		W(b)	__armv6_mmu_cache_on
1116		W(b)	__armv4_mmu_cache_off
1117		W(b)	__armv6_mmu_cache_flush
1118
1119		.word	0x000f0000		@ new CPU Id
1120		.word	0x000f0000
1121		W(b)	__armv7_mmu_cache_on
1122		W(b)	__armv7_mmu_cache_off
1123		W(b)	__armv7_mmu_cache_flush
1124
1125		.word	0			@ unrecognised type
1126		.word	0
1127		mov	pc, lr
1128 THUMB(		nop				)
1129		mov	pc, lr
1130 THUMB(		nop				)
1131		mov	pc, lr
1132 THUMB(		nop				)
1133
1134		.size	proc_types, . - proc_types
1135
1136		/*
1137		 * If you get a "non-constant expression in ".if" statement"
1138		 * error from the assembler on this line, check that you have
1139		 * not accidentally written a "b" instruction where you should
1140		 * have written W(b).
1141		 */
1142		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1143		.error "The size of one or more proc_types entries is wrong."
1144		.endif
1145
1146/*
1147 * Turn off the Cache and MMU.  ARMv3 does not support
1148 * reading the control register, but ARMv4 does.
1149 *
1150 * On exit,
1151 *  r0, r1, r2, r3, r9, r12 corrupted
1152 * This routine must preserve:
1153 *  r4, r7, r8
1154 */
1155		.align	5
1156cache_off:	mov	r3, #12			@ cache_off function
1157		b	call_cache_fn
1158
1159__armv4_mpu_cache_off:
1160		mrc	p15, 0, r0, c1, c0
1161		bic	r0, r0, #0x000d
1162		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1163		mov	r0, #0
1164		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1165		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1166		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1167		mov	pc, lr
1168
1169__armv3_mpu_cache_off:
1170		mrc	p15, 0, r0, c1, c0
1171		bic	r0, r0, #0x000d
1172		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1173		mov	r0, #0
1174		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1175		mov	pc, lr
1176
1177__armv4_mmu_cache_off:
1178#ifdef CONFIG_MMU
1179		mrc	p15, 0, r0, c1, c0
1180		bic	r0, r0, #0x000d
1181		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1182		mov	r0, #0
1183		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1184		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1185#endif
1186		mov	pc, lr
1187
1188__armv7_mmu_cache_off:
1189		mrc	p15, 0, r0, c1, c0
1190#ifdef CONFIG_MMU
1191		bic	r0, r0, #0x0005
1192#else
1193		bic	r0, r0, #0x0004
1194#endif
1195		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1196		mov	r0, #0
1197#ifdef CONFIG_MMU
1198		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1199#endif
1200		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1201		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1202		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1203		mov	pc, lr
1204
1205/*
1206 * Clean and flush the cache to maintain consistency.
1207 *
1208 * On entry,
1209 *  r0 = start address
1210 *  r1 = end address (exclusive)
1211 * On exit,
1212 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1213 * This routine must preserve:
1214 *  r4, r6, r7, r8
1215 */
1216		.align	5
1217cache_clean_flush:
1218		mov	r3, #16
1219		mov	r11, r1
1220		b	call_cache_fn
1221
1222__armv4_mpu_cache_flush:
1223		tst	r4, #1
1224		movne	pc, lr
1225		mov	r2, #1
1226		mov	r3, #0
1227		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1228		mov	r1, #7 << 5		@ 8 segments
12291:		orr	r3, r1, #63 << 26	@ 64 entries
12302:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1231		subs	r3, r3, #1 << 26
1232		bcs	2b			@ entries 63 to 0
1233		subs 	r1, r1, #1 << 5
1234		bcs	1b			@ segments 7 to 0
1235
1236		teq	r2, #0
1237		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1238		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1239		mov	pc, lr
1240		
1241__fa526_cache_flush:
1242		tst	r4, #1
1243		movne	pc, lr
1244		mov	r1, #0
1245		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1246		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1247		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1248		mov	pc, lr
1249
1250__armv6_mmu_cache_flush:
1251		mov	r1, #0
1252		tst	r4, #1
1253		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1254		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1255		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1256		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1257		mov	pc, lr
1258
1259__armv7_mmu_cache_flush:
1260		enable_cp15_barriers	r10
1261		tst	r4, #1
1262		bne	iflush
1263		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1264		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1265		mov	r10, #0
1266		beq	hierarchical
1267		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1268		b	iflush
1269hierarchical:
1270		dcache_line_size r1, r2		@ r1 := dcache min line size
1271		sub	r2, r1, #1		@ r2 := line size mask
1272		bic	r0, r0, r2		@ round down start to line size
1273		sub	r11, r11, #1		@ end address is exclusive
1274		bic	r11, r11, r2		@ round down end to line size
12750:		cmp	r0, r11			@ finished?
1276		bgt	iflush
1277		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1278		add	r0, r0, r1
1279		b	0b
1280iflush:
1281		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1282		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1283		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1284		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1285		mov	pc, lr
1286
1287__armv5tej_mmu_cache_flush:
1288		tst	r4, #1
1289		movne	pc, lr
12901:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1291		bne	1b
1292		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1293		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1294		mov	pc, lr
1295
1296__armv4_mmu_cache_flush:
1297		tst	r4, #1
1298		movne	pc, lr
1299		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1300		mov	r11, #32		@ default: 32 byte line size
1301		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1302		teq	r3, r9			@ cache ID register present?
1303		beq	no_cache_id
1304		mov	r1, r3, lsr #18
1305		and	r1, r1, #7
1306		mov	r2, #1024
1307		mov	r2, r2, lsl r1		@ base dcache size *2
1308		tst	r3, #1 << 14		@ test M bit
1309		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1310		mov	r3, r3, lsr #12
1311		and	r3, r3, #3
1312		mov	r11, #8
1313		mov	r11, r11, lsl r3	@ cache line size in bytes
1314no_cache_id:
1315		mov	r1, pc
1316		bic	r1, r1, #63		@ align to longest cache line
1317		add	r2, r1, r2
13181:
1319 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1320 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1321 THUMB(		add     r1, r1, r11		)
1322		teq	r1, r2
1323		bne	1b
1324
1325		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1326		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1327		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1328		mov	pc, lr
1329
1330__armv3_mmu_cache_flush:
1331__armv3_mpu_cache_flush:
1332		tst	r4, #1
1333		movne	pc, lr
1334		mov	r1, #0
1335		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1336		mov	pc, lr
1337
1338/*
1339 * Various debugging routines for printing hex characters and
1340 * memory, which again must be relocatable.
1341 */
1342#ifdef DEBUG
1343		.align	2
1344		.type	phexbuf,#object
1345phexbuf:	.space	12
1346		.size	phexbuf, . - phexbuf
1347
1348@ phex corrupts {r0, r1, r2, r3}
1349phex:		adr	r3, phexbuf
1350		mov	r2, #0
1351		strb	r2, [r3, r1]
13521:		subs	r1, r1, #1
1353		movmi	r0, r3
1354		bmi	puts
1355		and	r2, r0, #15
1356		mov	r0, r0, lsr #4
1357		cmp	r2, #10
1358		addge	r2, r2, #7
1359		add	r2, r2, #'0'
1360		strb	r2, [r3, r1]
1361		b	1b
1362
1363@ puts corrupts {r0, r1, r2, r3}
1364puts:		loadsp	r3, r2, r1
13651:		ldrb	r2, [r0], #1
1366		teq	r2, #0
1367		moveq	pc, lr
13682:		writeb	r2, r3, r1
1369		mov	r1, #0x00020000
13703:		subs	r1, r1, #1
1371		bne	3b
1372		teq	r2, #'\n'
1373		moveq	r2, #'\r'
1374		beq	2b
1375		teq	r0, #0
1376		bne	1b
1377		mov	pc, lr
1378@ putc corrupts {r0, r1, r2, r3}
1379putc:
1380		mov	r2, r0
1381		loadsp	r3, r1, r0
1382		mov	r0, #0
1383		b	2b
1384
1385@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1386memdump:	mov	r12, r0
1387		mov	r10, lr
1388		mov	r11, #0
13892:		mov	r0, r11, lsl #2
1390		add	r0, r0, r12
1391		mov	r1, #8
1392		bl	phex
1393		mov	r0, #':'
1394		bl	putc
13951:		mov	r0, #' '
1396		bl	putc
1397		ldr	r0, [r12, r11, lsl #2]
1398		mov	r1, #8
1399		bl	phex
1400		and	r0, r11, #7
1401		teq	r0, #3
1402		moveq	r0, #' '
1403		bleq	putc
1404		and	r0, r11, #7
1405		add	r11, r11, #1
1406		teq	r0, #7
1407		bne	1b
1408		mov	r0, #'\n'
1409		bl	putc
1410		cmp	r11, #64
1411		blt	2b
1412		mov	pc, r10
1413#endif
1414
1415		.ltorg
1416
1417#ifdef CONFIG_ARM_VIRT_EXT
1418.align 5
1419__hyp_reentry_vectors:
1420		W(b)	.			@ reset
1421		W(b)	.			@ undef
1422#ifdef CONFIG_EFI_STUB
1423		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1424#else
1425		W(b)	.			@ svc
1426#endif
1427		W(b)	.			@ pabort
1428		W(b)	.			@ dabort
1429		W(b)	__enter_kernel		@ hyp
1430		W(b)	.			@ irq
1431		W(b)	.			@ fiq
1432#endif /* CONFIG_ARM_VIRT_EXT */
1433
1434__enter_kernel:
1435		mov	r0, #0			@ must be 0
1436		mov	r1, r7			@ restore architecture number
1437		mov	r2, r8			@ restore atags pointer
1438 ARM(		mov	pc, r4		)	@ call kernel
1439 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1440 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1441
1442reloc_code_end:
1443
1444#ifdef CONFIG_EFI_STUB
1445__enter_kernel_from_hyp:
1446		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1447		bic	r0, r0, #0x5		@ disable MMU and caches
1448		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1449		isb
1450		b	__enter_kernel
1451
1452ENTRY(efi_enter_kernel)
1453		mov	r4, r0			@ preserve image base
1454		mov	r8, r1			@ preserve DT pointer
1455
1456		adr_l	r0, call_cache_fn
 
1457		adr	r1, 0f			@ clean the region of code we
1458		bl	cache_clean_flush	@ may run with the MMU off
1459
1460#ifdef CONFIG_ARM_VIRT_EXT
1461		@
1462		@ The EFI spec does not support booting on ARM in HYP mode,
1463		@ since it mandates that the MMU and caches are on, with all
1464		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1465		@
1466		@ While the EDK2 reference implementation adheres to this,
1467		@ U-Boot might decide to enter the EFI stub in HYP mode
1468		@ anyway, with the MMU and caches either on or off.
1469		@
1470		mrs	r0, cpsr		@ get the current mode
1471		msr	spsr_cxsf, r0		@ record boot mode
1472		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1473		cmp	r0, #HYP_MODE
1474		bne	.Lefi_svc
1475
1476		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1477		tst	r1, #0x1		@ MMU enabled at HYP?
1478		beq	1f
1479
1480		@
1481		@ When running in HYP mode with the caches on, we're better
1482		@ off just carrying on using the cached 1:1 mapping that the
1483		@ firmware provided. Set up the HYP vectors so HVC instructions
1484		@ issued from HYP mode take us to the correct handler code. We
1485		@ will disable the MMU before jumping to the kernel proper.
1486		@
1487 ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
1488 THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
1489		mcr	p15, 4, r1, c1, c0, 0
1490		adr	r0, __hyp_reentry_vectors
1491		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1492		isb
1493		b	.Lefi_hyp
1494
1495		@
1496		@ When running in HYP mode with the caches off, we need to drop
1497		@ into SVC mode now, and let the decompressor set up its cached
1498		@ 1:1 mapping as usual.
1499		@
15001:		mov	r9, r4			@ preserve image base
1501		bl	__hyp_stub_install	@ install HYP stub vectors
1502		safe_svcmode_maskall	r1	@ drop to SVC mode
1503		msr	spsr_cxsf, r0		@ record boot mode
1504		orr	r4, r9, #1		@ restore image base and set LSB
1505		b	.Lefi_hyp
1506.Lefi_svc:
1507#endif
1508		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1509		tst	r0, #0x1		@ MMU enabled?
1510		orreq	r4, r4, #1		@ set LSB if not
1511
1512.Lefi_hyp:
1513		mov	r0, r8			@ DT start
1514		add	r1, r8, r2		@ DT end
1515		bl	cache_clean_flush
1516
1517		adr	r0, 0f			@ switch to our stack
1518		ldr	sp, [r0]
1519		add	sp, sp, r0
1520
1521		mov	r5, #0			@ appended DTB size
1522		mov	r7, #0xFFFFFFFF		@ machine ID
1523		b	wont_overwrite
1524ENDPROC(efi_enter_kernel)
15250:		.long	.L_user_stack_end - .
1526#endif
1527
1528		.align
1529		.section ".stack", "aw", %nobits
1530.L_user_stack:	.space	4096
1531.L_user_stack_end:
v5.9
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 *  linux/arch/arm/boot/compressed/head.S
   4 *
   5 *  Copyright (C) 1996-2002 Russell King
   6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
   7 */
   8#include <linux/linkage.h>
   9#include <asm/assembler.h>
  10#include <asm/v7m.h>
  11
  12#include "efi-header.S"
  13
 
 
 
 
 
 
  14 AR_CLASS(	.arch	armv7-a	)
  15 M_CLASS(	.arch	armv7-m	)
  16
  17/*
  18 * Debugging stuff
  19 *
  20 * Note that these macros must not contain any code which is not
  21 * 100% relocatable.  Any attempt to do so will result in a crash.
  22 * Please select one of the following when turning on debugging.
  23 */
  24#ifdef DEBUG
  25
  26#if defined(CONFIG_DEBUG_ICEDCC)
  27
  28#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
  29		.macro	loadsp, rb, tmp1, tmp2
  30		.endm
  31		.macro	writeb, ch, rb
  32		mcr	p14, 0, \ch, c0, c5, 0
  33		.endm
  34#elif defined(CONFIG_CPU_XSCALE)
  35		.macro	loadsp, rb, tmp1, tmp2
  36		.endm
  37		.macro	writeb, ch, rb
  38		mcr	p14, 0, \ch, c8, c0, 0
  39		.endm
  40#else
  41		.macro	loadsp, rb, tmp1, tmp2
  42		.endm
  43		.macro	writeb, ch, rb
  44		mcr	p14, 0, \ch, c1, c0, 0
  45		.endm
  46#endif
  47
  48#else
  49
  50#include CONFIG_DEBUG_LL_INCLUDE
  51
  52		.macro	writeb,	ch, rb
 
 
 
 
  53		senduart \ch, \rb
 
  54		.endm
  55
  56#if defined(CONFIG_ARCH_SA1100)
  57		.macro	loadsp, rb, tmp1, tmp2
  58		mov	\rb, #0x80000000	@ physical base address
  59#ifdef CONFIG_DEBUG_LL_SER3
  60		add	\rb, \rb, #0x00050000	@ Ser3
  61#else
  62		add	\rb, \rb, #0x00010000	@ Ser1
  63#endif
  64		.endm
  65#else
  66		.macro	loadsp,	rb, tmp1, tmp2
  67		addruart \rb, \tmp1, \tmp2
  68		.endm
  69#endif
  70#endif
  71#endif
  72
  73		.macro	kputc,val
  74		mov	r0, \val
  75		bl	putc
  76		.endm
  77
  78		.macro	kphex,val,len
  79		mov	r0, \val
  80		mov	r1, #\len
  81		bl	phex
  82		.endm
  83
  84		.macro	debug_reloc_start
  85#ifdef DEBUG
  86		kputc	#'\n'
  87		kphex	r6, 8		/* processor id */
  88		kputc	#':'
  89		kphex	r7, 8		/* architecture id */
  90#ifdef CONFIG_CPU_CP15
  91		kputc	#':'
  92		mrc	p15, 0, r0, c1, c0
  93		kphex	r0, 8		/* control reg */
  94#endif
  95		kputc	#'\n'
  96		kphex	r5, 8		/* decompressed kernel start */
  97		kputc	#'-'
  98		kphex	r9, 8		/* decompressed kernel end  */
  99		kputc	#'>'
 100		kphex	r4, 8		/* kernel execution address */
 101		kputc	#'\n'
 102#endif
 103		.endm
 104
 105		.macro	debug_reloc_end
 106#ifdef DEBUG
 107		kphex	r5, 8		/* end of kernel */
 108		kputc	#'\n'
 109		mov	r0, r4
 110		bl	memdump		/* dump 256 bytes at start of kernel */
 111#endif
 112		.endm
 113
 114		/*
 115		 * Debug kernel copy by printing the memory addresses involved
 116		 */
 117		.macro dbgkc, begin, end, cbegin, cend
 118#ifdef DEBUG
 119		kputc   #'\n'
 120		kputc   #'C'
 121		kputc   #':'
 122		kputc   #'0'
 123		kputc   #'x'
 124		kphex   \begin, 8	/* Start of compressed kernel */
 125		kputc	#'-'
 126		kputc	#'0'
 127		kputc	#'x'
 128		kphex	\end, 8		/* End of compressed kernel */
 129		kputc	#'-'
 130		kputc	#'>'
 131		kputc   #'0'
 132		kputc   #'x'
 133		kphex   \cbegin, 8	/* Start of kernel copy */
 134		kputc	#'-'
 135		kputc	#'0'
 136		kputc	#'x'
 137		kphex	\cend, 8	/* End of kernel copy */
 138		kputc	#'\n'
 139		kputc	#'\r'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 140#endif
 141		.endm
 142
 143		.macro	enable_cp15_barriers, reg
 144		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
 145		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
 146		bne	.L_\@
 147		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
 148		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
 149 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
 150 THUMB(		isb						)
 151.L_\@:
 152		.endm
 153
 154		/*
 155		 * The kernel build system appends the size of the
 156		 * decompressed kernel at the end of the compressed data
 157		 * in little-endian form.
 158		 */
 159		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
 160		adr	\res, .Linflated_image_size_offset
 161		ldr	\tmp1, [\res]
 162		add	\tmp1, \tmp1, \res	@ address of inflated image size
 163
 164		ldrb	\res, [\tmp1]		@ get_unaligned_le32
 165		ldrb	\tmp2, [\tmp1, #1]
 166		orr	\res, \res, \tmp2, lsl #8
 167		ldrb	\tmp2, [\tmp1, #2]
 168		ldrb	\tmp1, [\tmp1, #3]
 169		orr	\res, \res, \tmp2, lsl #16
 170		orr	\res, \res, \tmp1, lsl #24
 171		.endm
 172
 
 
 
 
 
 
 
 173		.section ".start", "ax"
 174/*
 175 * sort out different calling conventions
 176 */
 177		.align
 178		/*
 179		 * Always enter in ARM state for CPUs that support the ARM ISA.
 180		 * As of today (2014) that's exactly the members of the A and R
 181		 * classes.
 182		 */
 183 AR_CLASS(	.arm	)
 184start:
 185		.type	start,#function
 186		/*
 187		 * These 7 nops along with the 1 nop immediately below for
 188		 * !THUMB2 form 8 nops that make the compressed kernel bootable
 189		 * on legacy ARM systems that were assuming the kernel in a.out
 190		 * binary format. The boot loaders on these systems would
 191		 * jump 32 bytes into the image to skip the a.out header.
 192		 * with these 8 nops filling exactly 32 bytes, things still
 193		 * work as expected on these legacy systems. Thumb2 mode keeps
 194		 * 7 of the nops as it turns out that some boot loaders
 195		 * were patching the initial instructions of the kernel, i.e
 196		 * had started to exploit this "patch area".
 197		 */
 198		.rept	7
 
 199		__nop
 200		.endr
 201#ifndef CONFIG_THUMB2_KERNEL
 202		__nop
 203#else
 204 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
 205  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
 206		.thumb
 207#endif
 208		W(b)	1f
 209
 210		.word	_magic_sig	@ Magic numbers to help the loader
 211		.word	_magic_start	@ absolute load/run zImage address
 212		.word	_magic_end	@ zImage end address
 213		.word	0x04030201	@ endianness flag
 214		.word	0x45454545	@ another magic number to indicate
 215		.word	_magic_table	@ additional data table
 216
 217		__EFI_HEADER
 2181:
 219 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
 220 AR_CLASS(	mrs	r9, cpsr	)
 221#ifdef CONFIG_ARM_VIRT_EXT
 222		bl	__hyp_stub_install	@ get into SVC mode, reversibly
 223#endif
 224		mov	r7, r1			@ save architecture ID
 225		mov	r8, r2			@ save atags pointer
 226
 227#ifndef CONFIG_CPU_V7M
 228		/*
 229		 * Booting from Angel - need to enter SVC mode and disable
 230		 * FIQs/IRQs (numeric definitions from angel arm.h source).
 231		 * We only do this if we were in user mode on entry.
 232		 */
 233		mrs	r2, cpsr		@ get current mode
 234		tst	r2, #3			@ not user?
 235		bne	not_angel
 236		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
 237 ARM(		swi	0x123456	)	@ angel_SWI_ARM
 238 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
 239not_angel:
 240		safe_svcmode_maskall r0
 241		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
 242						@ SPSR
 243#endif
 244		/*
 245		 * Note that some cache flushing and other stuff may
 246		 * be needed here - is there an Angel SWI call for this?
 247		 */
 248
 249		/*
 250		 * some architecture specific code can be inserted
 251		 * by the linker here, but it should preserve r7, r8, and r9.
 252		 */
 253
 254		.text
 255
 256#ifdef CONFIG_AUTO_ZRELADDR
 257		/*
 258		 * Find the start of physical memory.  As we are executing
 259		 * without the MMU on, we are in the physical address space.
 260		 * We just need to get rid of any offset by aligning the
 261		 * address.
 262		 *
 263		 * This alignment is a balance between the requirements of
 264		 * different platforms - we have chosen 128MB to allow
 265		 * platforms which align the start of their physical memory
 266		 * to 128MB to use this feature, while allowing the zImage
 267		 * to be placed within the first 128MB of memory on other
 268		 * platforms.  Increasing the alignment means we place
 269		 * stricter alignment requirements on the start of physical
 270		 * memory, but relaxing it means that we break people who
 271		 * are already placing their zImage in (eg) the top 64MB
 272		 * of this range.
 273		 */
 274		mov	r4, pc
 275		and	r4, r4, #0xf8000000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 276		/* Determine final kernel image address. */
 277		add	r4, r4, #TEXT_OFFSET
 278#else
 279		ldr	r4, =zreladdr
 280#endif
 281
 282		/*
 283		 * Set up a page table only if it won't overwrite ourself.
 284		 * That means r4 < pc || r4 - 16k page directory > &_end.
 285		 * Given that r4 > &_end is most unfrequent, we add a rough
 286		 * additional 1MB of room for a possible appended DTB.
 287		 */
 288		mov	r0, pc
 289		cmp	r0, r4
 290		ldrcc	r0, .Lheadroom
 291		addcc	r0, r0, pc
 292		cmpcc	r4, r0
 293		orrcc	r4, r4, #1		@ remember we skipped cache_on
 294		blcs	cache_on
 295
 296restart:	adr	r0, LC1
 297		ldr	sp, [r0]
 298		ldr	r6, [r0, #4]
 299		add	sp, sp, r0
 300		add	r6, r6, r0
 301
 302		get_inflated_image_size	r9, r10, lr
 303
 304#ifndef CONFIG_ZBOOT_ROM
 305		/* malloc space is above the relocated stack (64k max) */
 306		add	r10, sp, #0x10000
 307#else
 308		/*
 309		 * With ZBOOT_ROM the bss/stack is non relocatable,
 310		 * but someone could still run this code from RAM,
 311		 * in which case our reference is _edata.
 312		 */
 313		mov	r10, r6
 314#endif
 315
 316		mov	r5, #0			@ init dtb size to 0
 317#ifdef CONFIG_ARM_APPENDED_DTB
 318/*
 319 *   r4  = final kernel address (possibly with LSB set)
 320 *   r5  = appended dtb size (still unknown)
 321 *   r6  = _edata
 322 *   r7  = architecture ID
 323 *   r8  = atags/device tree pointer
 324 *   r9  = size of decompressed image
 325 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 326 *   sp  = stack pointer
 327 *
 328 * if there are device trees (dtb) appended to zImage, advance r10 so that the
 329 * dtb data will get relocated along with the kernel if necessary.
 330 */
 331
 332		ldr	lr, [r6, #0]
 333#ifndef __ARMEB__
 334		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
 335#else
 336		ldr	r1, =0xd00dfeed
 337#endif
 338		cmp	lr, r1
 339		bne	dtb_check_done		@ not found
 340
 341#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
 342		/*
 343		 * OK... Let's do some funky business here.
 344		 * If we do have a DTB appended to zImage, and we do have
 345		 * an ATAG list around, we want the later to be translated
 346		 * and folded into the former here. No GOT fixup has occurred
 347		 * yet, but none of the code we're about to call uses any
 348		 * global variable.
 349		*/
 350
 351		/* Get the initial DTB size */
 352		ldr	r5, [r6, #4]
 353#ifndef __ARMEB__
 354		/* convert to little endian */
 355		eor	r1, r5, r5, ror #16
 356		bic	r1, r1, #0x00ff0000
 357		mov	r5, r5, ror #8
 358		eor	r5, r5, r1, lsr #8
 359#endif
 360		/* 50% DTB growth should be good enough */
 361		add	r5, r5, r5, lsr #1
 362		/* preserve 64-bit alignment */
 363		add	r5, r5, #7
 364		bic	r5, r5, #7
 365		/* clamp to 32KB min and 1MB max */
 366		cmp	r5, #(1 << 15)
 367		movlo	r5, #(1 << 15)
 368		cmp	r5, #(1 << 20)
 369		movhi	r5, #(1 << 20)
 370		/* temporarily relocate the stack past the DTB work space */
 371		add	sp, sp, r5
 372
 373		mov	r0, r8
 374		mov	r1, r6
 375		mov	r2, r5
 376		bl	atags_to_fdt
 377
 378		/*
 379		 * If returned value is 1, there is no ATAG at the location
 380		 * pointed by r8.  Try the typical 0x100 offset from start
 381		 * of RAM and hope for the best.
 382		 */
 383		cmp	r0, #1
 384		sub	r0, r4, #TEXT_OFFSET
 385		bic	r0, r0, #1
 386		add	r0, r0, #0x100
 387		mov	r1, r6
 388		mov	r2, r5
 389		bleq	atags_to_fdt
 390
 391		sub	sp, sp, r5
 392#endif
 393
 394		mov	r8, r6			@ use the appended device tree
 395
 396		/*
 397		 * Make sure that the DTB doesn't end up in the final
 398		 * kernel's .bss area. To do so, we adjust the decompressed
 399		 * kernel size to compensate if that .bss size is larger
 400		 * than the relocated code.
 401		 */
 402		ldr	r5, =_kernel_bss_size
 403		adr	r1, wont_overwrite
 404		sub	r1, r6, r1
 405		subs	r1, r5, r1
 406		addhi	r9, r9, r1
 407
 408		/* Get the current DTB size */
 409		ldr	r5, [r6, #4]
 410#ifndef __ARMEB__
 411		/* convert r5 (dtb size) to little endian */
 412		eor	r1, r5, r5, ror #16
 413		bic	r1, r1, #0x00ff0000
 414		mov	r5, r5, ror #8
 415		eor	r5, r5, r1, lsr #8
 416#endif
 417
 418		/* preserve 64-bit alignment */
 419		add	r5, r5, #7
 420		bic	r5, r5, #7
 421
 422		/* relocate some pointers past the appended dtb */
 423		add	r6, r6, r5
 424		add	r10, r10, r5
 425		add	sp, sp, r5
 426dtb_check_done:
 427#endif
 428
 429/*
 430 * Check to see if we will overwrite ourselves.
 431 *   r4  = final kernel address (possibly with LSB set)
 432 *   r9  = size of decompressed image
 433 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 434 * We basically want:
 435 *   r4 - 16k page directory >= r10 -> OK
 436 *   r4 + image length <= address of wont_overwrite -> OK
 437 * Note: the possible LSB in r4 is harmless here.
 438 */
 439		add	r10, r10, #16384
 440		cmp	r4, r10
 441		bhs	wont_overwrite
 442		add	r10, r4, r9
 443		adr	r9, wont_overwrite
 444		cmp	r10, r9
 445		bls	wont_overwrite
 446
 447/*
 448 * Relocate ourselves past the end of the decompressed kernel.
 449 *   r6  = _edata
 450 *   r10 = end of the decompressed kernel
 451 * Because we always copy ahead, we need to do it from the end and go
 452 * backward in case the source and destination overlap.
 453 */
 454		/*
 455		 * Bump to the next 256-byte boundary with the size of
 456		 * the relocation code added. This avoids overwriting
 457		 * ourself when the offset is small.
 458		 */
 459		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
 460		bic	r10, r10, #255
 461
 462		/* Get start of code we want to copy and align it down. */
 463		adr	r5, restart
 464		bic	r5, r5, #31
 465
 466/* Relocate the hyp vector base if necessary */
 467#ifdef CONFIG_ARM_VIRT_EXT
 468		mrs	r0, spsr
 469		and	r0, r0, #MODE_MASK
 470		cmp	r0, #HYP_MODE
 471		bne	1f
 472
 473		/*
 474		 * Compute the address of the hyp vectors after relocation.
 475		 * This requires some arithmetic since we cannot directly
 476		 * reference __hyp_stub_vectors in a PC-relative way.
 477		 * Call __hyp_set_vectors with the new address so that we
 478		 * can HVC again after the copy.
 479		 */
 4800:		adr	r0, 0b
 481		movw	r1, #:lower16:__hyp_stub_vectors - 0b
 482		movt	r1, #:upper16:__hyp_stub_vectors - 0b
 483		add	r0, r0, r1
 484		sub	r0, r0, r5
 485		add	r0, r0, r10
 486		bl	__hyp_set_vectors
 4871:
 488#endif
 489
 490		sub	r9, r6, r5		@ size to copy
 491		add	r9, r9, #31		@ rounded up to a multiple
 492		bic	r9, r9, #31		@ ... of 32 bytes
 493		add	r6, r9, r5
 494		add	r9, r9, r10
 495
 496#ifdef DEBUG
 497		sub     r10, r6, r5
 498		sub     r10, r9, r10
 499		/*
 500		 * We are about to copy the kernel to a new memory area.
 501		 * The boundaries of the new memory area can be found in
 502		 * r10 and r9, whilst r5 and r6 contain the boundaries
 503		 * of the memory we are going to copy.
 504		 * Calling dbgkc will help with the printing of this
 505		 * information.
 506		 */
 507		dbgkc	r5, r6, r10, r9
 508#endif
 509
 5101:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
 511		cmp	r6, r5
 512		stmdb	r9!, {r0 - r3, r10 - r12, lr}
 513		bhi	1b
 514
 515		/* Preserve offset to relocated code. */
 516		sub	r6, r9, r6
 517
 518		mov	r0, r9			@ start of relocated zImage
 519		add	r1, sp, r6		@ end of relocated zImage
 520		bl	cache_clean_flush
 521
 522		badr	r0, restart
 523		add	r0, r0, r6
 524		mov	pc, r0
 525
 526wont_overwrite:
 527		adr	r0, LC0
 528		ldmia	r0, {r1, r2, r3, r11, r12}
 529		sub	r0, r0, r1		@ calculate the delta offset
 530
 531/*
 532 * If delta is zero, we are running at the address we were linked at.
 533 *   r0  = delta
 534 *   r2  = BSS start
 535 *   r3  = BSS end
 536 *   r4  = kernel execution address (possibly with LSB set)
 537 *   r5  = appended dtb size (0 if not present)
 538 *   r7  = architecture ID
 539 *   r8  = atags pointer
 540 *   r11 = GOT start
 541 *   r12 = GOT end
 542 *   sp  = stack pointer
 543 */
 544		orrs	r1, r0, r5
 545		beq	not_relocated
 546
 547		add	r11, r11, r0
 548		add	r12, r12, r0
 549
 550#ifndef CONFIG_ZBOOT_ROM
 551		/*
 552		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
 553		 * we need to fix up pointers into the BSS region.
 554		 * Note that the stack pointer has already been fixed up.
 555		 */
 556		add	r2, r2, r0
 557		add	r3, r3, r0
 558
 559		/*
 560		 * Relocate all entries in the GOT table.
 561		 * Bump bss entries to _edata + dtb size
 562		 */
 5631:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
 564		add	r1, r1, r0		@ This fixes up C references
 565		cmp	r1, r2			@ if entry >= bss_start &&
 566		cmphs	r3, r1			@       bss_end > entry
 567		addhi	r1, r1, r5		@    entry += dtb size
 568		str	r1, [r11], #4		@ next entry
 569		cmp	r11, r12
 570		blo	1b
 571
 572		/* bump our bss pointers too */
 573		add	r2, r2, r5
 574		add	r3, r3, r5
 575
 576#else
 577
 578		/*
 579		 * Relocate entries in the GOT table.  We only relocate
 580		 * the entries that are outside the (relocated) BSS region.
 581		 */
 5821:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
 583		cmp	r1, r2			@ entry < bss_start ||
 584		cmphs	r3, r1			@ _end < entry
 585		addlo	r1, r1, r0		@ table.  This fixes up the
 586		str	r1, [r11], #4		@ C references.
 587		cmp	r11, r12
 588		blo	1b
 589#endif
 590
 591not_relocated:	mov	r0, #0
 5921:		str	r0, [r2], #4		@ clear bss
 593		str	r0, [r2], #4
 594		str	r0, [r2], #4
 595		str	r0, [r2], #4
 596		cmp	r2, r3
 597		blo	1b
 598
 599		/*
 600		 * Did we skip the cache setup earlier?
 601		 * That is indicated by the LSB in r4.
 602		 * Do it now if so.
 603		 */
 604		tst	r4, #1
 605		bic	r4, r4, #1
 606		blne	cache_on
 607
 608/*
 609 * The C runtime environment should now be setup sufficiently.
 610 * Set up some pointers, and start decompressing.
 611 *   r4  = kernel execution address
 612 *   r7  = architecture ID
 613 *   r8  = atags pointer
 614 */
 615		mov	r0, r4
 616		mov	r1, sp			@ malloc space above stack
 617		add	r2, sp, #0x10000	@ 64k max
 618		mov	r3, r7
 619		bl	decompress_kernel
 620
 621		get_inflated_image_size	r1, r2, r3
 622
 623		mov	r0, r4			@ start of inflated image
 624		add	r1, r1, r0		@ end of inflated image
 625		bl	cache_clean_flush
 626		bl	cache_off
 627
 628#ifdef CONFIG_ARM_VIRT_EXT
 629		mrs	r0, spsr		@ Get saved CPU boot mode
 630		and	r0, r0, #MODE_MASK
 631		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
 632		bne	__enter_kernel		@ boot kernel directly
 633
 634		adr	r12, .L__hyp_reentry_vectors_offset
 635		ldr	r0, [r12]
 636		add	r0, r0, r12
 637
 638		bl	__hyp_set_vectors
 639		__HVC(0)			@ otherwise bounce to hyp mode
 640
 641		b	.			@ should never be reached
 642
 643		.align	2
 644.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
 645#else
 646		b	__enter_kernel
 647#endif
 648
 649		.align	2
 650		.type	LC0, #object
 651LC0:		.word	LC0			@ r1
 652		.word	__bss_start		@ r2
 653		.word	_end			@ r3
 654		.word	_got_start		@ r11
 655		.word	_got_end		@ ip
 656		.size	LC0, . - LC0
 657
 658		.type	LC1, #object
 659LC1:		.word	.L_user_stack_end - LC1	@ sp
 660		.word	_edata - LC1		@ r6
 661		.size	LC1, . - LC1
 662
 663.Lheadroom:
 664		.word	_end - restart + 16384 + 1024*1024
 665
 666.Linflated_image_size_offset:
 667		.long	(input_data_end - 4) - .
 668
 669#ifdef CONFIG_ARCH_RPC
 670		.globl	params
 671params:		ldr	r0, =0x10000100		@ params_phys for RPC
 672		mov	pc, lr
 673		.ltorg
 674		.align
 675#endif
 676
 677/*
 678 * dcache_line_size - get the minimum D-cache line size from the CTR register
 679 * on ARMv7.
 680 */
 681		.macro	dcache_line_size, reg, tmp
 682#ifdef CONFIG_CPU_V7M
 683		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
 684		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
 685		ldr	\tmp, [\tmp]
 686#else
 687		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
 688#endif
 689		lsr	\tmp, \tmp, #16
 690		and	\tmp, \tmp, #0xf		@ cache line size encoding
 691		mov	\reg, #4			@ bytes per word
 692		mov	\reg, \reg, lsl \tmp		@ actual cache line size
 693		.endm
 694
 695/*
 696 * Turn on the cache.  We need to setup some page tables so that we
 697 * can have both the I and D caches on.
 698 *
 699 * We place the page tables 16k down from the kernel execution address,
 700 * and we hope that nothing else is using it.  If we're using it, we
 701 * will go pop!
 702 *
 703 * On entry,
 704 *  r4 = kernel execution address
 705 *  r7 = architecture number
 706 *  r8 = atags pointer
 707 * On exit,
 708 *  r0, r1, r2, r3, r9, r10, r12 corrupted
 709 * This routine must preserve:
 710 *  r4, r7, r8
 711 */
 712		.align	5
 713cache_on:	mov	r3, #8			@ cache_on function
 714		b	call_cache_fn
 715
 716/*
 717 * Initialize the highest priority protection region, PR7
 718 * to cover all 32bit address and cacheable and bufferable.
 719 */
 720__armv4_mpu_cache_on:
 721		mov	r0, #0x3f		@ 4G, the whole
 722		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
 723		mcr 	p15, 0, r0, c6, c7, 1
 724
 725		mov	r0, #0x80		@ PR7
 726		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
 727		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
 728		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
 729
 730		mov	r0, #0xc000
 731		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
 732		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
 733
 734		mov	r0, #0
 735		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 736		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
 737		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
 738		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 739						@ ...I .... ..D. WC.M
 740		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
 741		orr	r0, r0, #0x1000		@ ...1 .... .... ....
 742
 743		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
 744
 745		mov	r0, #0
 746		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
 747		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
 748		mov	pc, lr
 749
 750__armv3_mpu_cache_on:
 751		mov	r0, #0x3f		@ 4G, the whole
 752		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
 753
 754		mov	r0, #0x80		@ PR7
 755		mcr	p15, 0, r0, c2, c0, 0	@ cache on
 756		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
 757
 758		mov	r0, #0xc000
 759		mcr	p15, 0, r0, c5, c0, 0	@ access permission
 760
 761		mov	r0, #0
 762		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
 763		/*
 764		 * ?? ARMv3 MMU does not allow reading the control register,
 765		 * does this really work on ARMv3 MPU?
 766		 */
 767		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 768						@ .... .... .... WC.M
 769		orr	r0, r0, #0x000d		@ .... .... .... 11.1
 770		/* ?? this overwrites the value constructed above? */
 771		mov	r0, #0
 772		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
 773
 774		/* ?? invalidate for the second time? */
 775		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
 776		mov	pc, lr
 777
 778#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 779#define CB_BITS 0x08
 780#else
 781#define CB_BITS 0x0c
 782#endif
 783
 784__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
 785		bic	r3, r3, #0xff		@ Align the pointer
 786		bic	r3, r3, #0x3f00
 787/*
 788 * Initialise the page tables, turning on the cacheable and bufferable
 789 * bits for the RAM area only.
 790 */
 791		mov	r0, r3
 792		mov	r9, r0, lsr #18
 793		mov	r9, r9, lsl #18		@ start of RAM
 794		add	r10, r9, #0x10000000	@ a reasonable RAM size
 795		mov	r1, #0x12		@ XN|U + section mapping
 796		orr	r1, r1, #3 << 10	@ AP=11
 797		add	r2, r3, #16384
 7981:		cmp	r1, r9			@ if virt > start of RAM
 799		cmphs	r10, r1			@   && end of RAM > virt
 800		bic	r1, r1, #0x1c		@ clear XN|U + C + B
 801		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
 802		orrhs	r1, r1, r6		@ set RAM section settings
 803		str	r1, [r0], #4		@ 1:1 mapping
 804		add	r1, r1, #1048576
 805		teq	r0, r2
 806		bne	1b
 807/*
 808 * If ever we are running from Flash, then we surely want the cache
 809 * to be enabled also for our execution instance...  We map 2MB of it
 810 * so there is no map overlap problem for up to 1 MB compressed kernel.
 811 * If the execution is in RAM then we would only be duplicating the above.
 812 */
 813		orr	r1, r6, #0x04		@ ensure B is set for this
 814		orr	r1, r1, #3 << 10
 815		mov	r2, pc
 816		mov	r2, r2, lsr #20
 817		orr	r1, r1, r2, lsl #20
 818		add	r0, r3, r2, lsl #2
 819		str	r1, [r0], #4
 820		add	r1, r1, #1048576
 821		str	r1, [r0]
 822		mov	pc, lr
 823ENDPROC(__setup_mmu)
 824
 825@ Enable unaligned access on v6, to allow better code generation
 826@ for the decompressor C code:
 827__armv6_mmu_cache_on:
 828		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
 829		bic	r0, r0, #2		@ A (no unaligned access fault)
 830		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
 831		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
 832		b	__armv4_mmu_cache_on
 833
 834__arm926ejs_mmu_cache_on:
 835#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 836		mov	r0, #4			@ put dcache in WT mode
 837		mcr	p15, 7, r0, c15, c0, 0
 838#endif
 839
 840__armv4_mmu_cache_on:
 841		mov	r12, lr
 842#ifdef CONFIG_MMU
 843		mov	r6, #CB_BITS | 0x12	@ U
 844		bl	__setup_mmu
 845		mov	r0, #0
 846		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 847		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 848		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 849		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 850		orr	r0, r0, #0x0030
 851 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
 852		bl	__common_mmu_cache_on
 853		mov	r0, #0
 854		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 855#endif
 856		mov	pc, r12
 857
 858__armv7_mmu_cache_on:
 859		enable_cp15_barriers	r11
 860		mov	r12, lr
 861#ifdef CONFIG_MMU
 862		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
 863		tst	r11, #0xf		@ VMSA
 864		movne	r6, #CB_BITS | 0x02	@ !XN
 865		blne	__setup_mmu
 866		mov	r0, #0
 867		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 868		tst	r11, #0xf		@ VMSA
 869		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 870#endif
 871		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 872		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
 873		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 874		orr	r0, r0, #0x003c		@ write buffer
 875		bic	r0, r0, #2		@ A (no unaligned access fault)
 876		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
 877						@ (needed for ARM1176)
 878#ifdef CONFIG_MMU
 879 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
 880		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
 881		orrne	r0, r0, #1		@ MMU enabled
 882		movne	r1, #0xfffffffd		@ domain 0 = client
 883		bic     r6, r6, #1 << 31        @ 32-bit translation system
 884		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
 885		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
 886		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
 887		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 888#endif
 889		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 890		mcr	p15, 0, r0, c1, c0, 0	@ load control register
 891		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
 892		mov	r0, #0
 893		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 894		mov	pc, r12
 895
 896__fa526_cache_on:
 897		mov	r12, lr
 898		mov	r6, #CB_BITS | 0x12	@ U
 899		bl	__setup_mmu
 900		mov	r0, #0
 901		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
 902		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 903		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
 904		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 905		orr	r0, r0, #0x1000		@ I-cache enable
 906		bl	__common_mmu_cache_on
 907		mov	r0, #0
 908		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
 909		mov	pc, r12
 910
 911__common_mmu_cache_on:
 912#ifndef CONFIG_THUMB2_KERNEL
 913#ifndef DEBUG
 914		orr	r0, r0, #0x000d		@ Write buffer, mmu
 915#endif
 916		mov	r1, #-1
 917		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
 918		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
 919		b	1f
 920		.align	5			@ cache line aligned
 9211:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
 922		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
 923		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
 924#endif
 925
 926#define PROC_ENTRY_SIZE (4*5)
 927
 928/*
 929 * Here follow the relocatable cache support functions for the
 930 * various processors.  This is a generic hook for locating an
 931 * entry and jumping to an instruction at the specified offset
 932 * from the start of the block.  Please note this is all position
 933 * independent code.
 934 *
 935 *  r1  = corrupted
 936 *  r2  = corrupted
 937 *  r3  = block offset
 938 *  r9  = corrupted
 939 *  r12 = corrupted
 940 */
 941
 942call_cache_fn:	adr	r12, proc_types
 943#ifdef CONFIG_CPU_CP15
 944		mrc	p15, 0, r9, c0, c0	@ get processor ID
 945#elif defined(CONFIG_CPU_V7M)
 946		/*
 947		 * On v7-M the processor id is located in the V7M_SCB_CPUID
 948		 * register, but as cache handling is IMPLEMENTATION DEFINED on
 949		 * v7-M (if existant at all) we just return early here.
 950		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
 951		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
 952		 * use cp15 registers that are not implemented on v7-M.
 953		 */
 954		bx	lr
 955#else
 956		ldr	r9, =CONFIG_PROCESSOR_ID
 957#endif
 9581:		ldr	r1, [r12, #0]		@ get value
 959		ldr	r2, [r12, #4]		@ get mask
 960		eor	r1, r1, r9		@ (real ^ match)
 961		tst	r1, r2			@       & mask
 962 ARM(		addeq	pc, r12, r3		) @ call cache function
 963 THUMB(		addeq	r12, r3			)
 964 THUMB(		moveq	pc, r12			) @ call cache function
 965		add	r12, r12, #PROC_ENTRY_SIZE
 966		b	1b
 967
 968/*
 969 * Table for cache operations.  This is basically:
 970 *   - CPU ID match
 971 *   - CPU ID mask
 972 *   - 'cache on' method instruction
 973 *   - 'cache off' method instruction
 974 *   - 'cache flush' method instruction
 975 *
 976 * We match an entry using: ((real_id ^ match) & mask) == 0
 977 *
 978 * Writethrough caches generally only need 'on' and 'off'
 979 * methods.  Writeback caches _must_ have the flush method
 980 * defined.
 981 */
 982		.align	2
 983		.type	proc_types,#object
 984proc_types:
 985		.word	0x41000000		@ old ARM ID
 986		.word	0xff00f000
 987		mov	pc, lr
 988 THUMB(		nop				)
 989		mov	pc, lr
 990 THUMB(		nop				)
 991		mov	pc, lr
 992 THUMB(		nop				)
 993
 994		.word	0x41007000		@ ARM7/710
 995		.word	0xfff8fe00
 996		mov	pc, lr
 997 THUMB(		nop				)
 998		mov	pc, lr
 999 THUMB(		nop				)
1000		mov	pc, lr
1001 THUMB(		nop				)
1002
1003		.word	0x41807200		@ ARM720T (writethrough)
1004		.word	0xffffff00
1005		W(b)	__armv4_mmu_cache_on
1006		W(b)	__armv4_mmu_cache_off
1007		mov	pc, lr
1008 THUMB(		nop				)
1009
1010		.word	0x41007400		@ ARM74x
1011		.word	0xff00ff00
1012		W(b)	__armv3_mpu_cache_on
1013		W(b)	__armv3_mpu_cache_off
1014		W(b)	__armv3_mpu_cache_flush
1015		
1016		.word	0x41009400		@ ARM94x
1017		.word	0xff00ff00
1018		W(b)	__armv4_mpu_cache_on
1019		W(b)	__armv4_mpu_cache_off
1020		W(b)	__armv4_mpu_cache_flush
1021
1022		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1023		.word	0xff0ffff0
1024		W(b)	__arm926ejs_mmu_cache_on
1025		W(b)	__armv4_mmu_cache_off
1026		W(b)	__armv5tej_mmu_cache_flush
1027
1028		.word	0x00007000		@ ARM7 IDs
1029		.word	0x0000f000
1030		mov	pc, lr
1031 THUMB(		nop				)
1032		mov	pc, lr
1033 THUMB(		nop				)
1034		mov	pc, lr
1035 THUMB(		nop				)
1036
1037		@ Everything from here on will be the new ID system.
1038
1039		.word	0x4401a100		@ sa110 / sa1100
1040		.word	0xffffffe0
1041		W(b)	__armv4_mmu_cache_on
1042		W(b)	__armv4_mmu_cache_off
1043		W(b)	__armv4_mmu_cache_flush
1044
1045		.word	0x6901b110		@ sa1110
1046		.word	0xfffffff0
1047		W(b)	__armv4_mmu_cache_on
1048		W(b)	__armv4_mmu_cache_off
1049		W(b)	__armv4_mmu_cache_flush
1050
1051		.word	0x56056900
1052		.word	0xffffff00		@ PXA9xx
1053		W(b)	__armv4_mmu_cache_on
1054		W(b)	__armv4_mmu_cache_off
1055		W(b)	__armv4_mmu_cache_flush
1056
1057		.word	0x56158000		@ PXA168
1058		.word	0xfffff000
1059		W(b)	__armv4_mmu_cache_on
1060		W(b)	__armv4_mmu_cache_off
1061		W(b)	__armv5tej_mmu_cache_flush
1062
1063		.word	0x56050000		@ Feroceon
1064		.word	0xff0f0000
1065		W(b)	__armv4_mmu_cache_on
1066		W(b)	__armv4_mmu_cache_off
1067		W(b)	__armv5tej_mmu_cache_flush
1068
1069#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1070		/* this conflicts with the standard ARMv5TE entry */
1071		.long	0x41009260		@ Old Feroceon
1072		.long	0xff00fff0
1073		b	__armv4_mmu_cache_on
1074		b	__armv4_mmu_cache_off
1075		b	__armv5tej_mmu_cache_flush
1076#endif
1077
1078		.word	0x66015261		@ FA526
1079		.word	0xff01fff1
1080		W(b)	__fa526_cache_on
1081		W(b)	__armv4_mmu_cache_off
1082		W(b)	__fa526_cache_flush
1083
1084		@ These match on the architecture ID
1085
1086		.word	0x00020000		@ ARMv4T
1087		.word	0x000f0000
1088		W(b)	__armv4_mmu_cache_on
1089		W(b)	__armv4_mmu_cache_off
1090		W(b)	__armv4_mmu_cache_flush
1091
1092		.word	0x00050000		@ ARMv5TE
1093		.word	0x000f0000
1094		W(b)	__armv4_mmu_cache_on
1095		W(b)	__armv4_mmu_cache_off
1096		W(b)	__armv4_mmu_cache_flush
1097
1098		.word	0x00060000		@ ARMv5TEJ
1099		.word	0x000f0000
1100		W(b)	__armv4_mmu_cache_on
1101		W(b)	__armv4_mmu_cache_off
1102		W(b)	__armv5tej_mmu_cache_flush
1103
1104		.word	0x0007b000		@ ARMv6
1105		.word	0x000ff000
1106		W(b)	__armv6_mmu_cache_on
1107		W(b)	__armv4_mmu_cache_off
1108		W(b)	__armv6_mmu_cache_flush
1109
1110		.word	0x000f0000		@ new CPU Id
1111		.word	0x000f0000
1112		W(b)	__armv7_mmu_cache_on
1113		W(b)	__armv7_mmu_cache_off
1114		W(b)	__armv7_mmu_cache_flush
1115
1116		.word	0			@ unrecognised type
1117		.word	0
1118		mov	pc, lr
1119 THUMB(		nop				)
1120		mov	pc, lr
1121 THUMB(		nop				)
1122		mov	pc, lr
1123 THUMB(		nop				)
1124
1125		.size	proc_types, . - proc_types
1126
1127		/*
1128		 * If you get a "non-constant expression in ".if" statement"
1129		 * error from the assembler on this line, check that you have
1130		 * not accidentally written a "b" instruction where you should
1131		 * have written W(b).
1132		 */
1133		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1134		.error "The size of one or more proc_types entries is wrong."
1135		.endif
1136
1137/*
1138 * Turn off the Cache and MMU.  ARMv3 does not support
1139 * reading the control register, but ARMv4 does.
1140 *
1141 * On exit,
1142 *  r0, r1, r2, r3, r9, r12 corrupted
1143 * This routine must preserve:
1144 *  r4, r7, r8
1145 */
1146		.align	5
1147cache_off:	mov	r3, #12			@ cache_off function
1148		b	call_cache_fn
1149
1150__armv4_mpu_cache_off:
1151		mrc	p15, 0, r0, c1, c0
1152		bic	r0, r0, #0x000d
1153		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1154		mov	r0, #0
1155		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1156		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1157		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1158		mov	pc, lr
1159
1160__armv3_mpu_cache_off:
1161		mrc	p15, 0, r0, c1, c0
1162		bic	r0, r0, #0x000d
1163		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1164		mov	r0, #0
1165		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1166		mov	pc, lr
1167
1168__armv4_mmu_cache_off:
1169#ifdef CONFIG_MMU
1170		mrc	p15, 0, r0, c1, c0
1171		bic	r0, r0, #0x000d
1172		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1173		mov	r0, #0
1174		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1175		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1176#endif
1177		mov	pc, lr
1178
1179__armv7_mmu_cache_off:
1180		mrc	p15, 0, r0, c1, c0
1181#ifdef CONFIG_MMU
1182		bic	r0, r0, #0x000d
1183#else
1184		bic	r0, r0, #0x000c
1185#endif
1186		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1187		mov	r0, #0
1188#ifdef CONFIG_MMU
1189		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1190#endif
1191		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1192		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1193		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1194		mov	pc, lr
1195
1196/*
1197 * Clean and flush the cache to maintain consistency.
1198 *
1199 * On entry,
1200 *  r0 = start address
1201 *  r1 = end address (exclusive)
1202 * On exit,
1203 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1204 * This routine must preserve:
1205 *  r4, r6, r7, r8
1206 */
1207		.align	5
1208cache_clean_flush:
1209		mov	r3, #16
1210		mov	r11, r1
1211		b	call_cache_fn
1212
1213__armv4_mpu_cache_flush:
1214		tst	r4, #1
1215		movne	pc, lr
1216		mov	r2, #1
1217		mov	r3, #0
1218		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1219		mov	r1, #7 << 5		@ 8 segments
12201:		orr	r3, r1, #63 << 26	@ 64 entries
12212:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1222		subs	r3, r3, #1 << 26
1223		bcs	2b			@ entries 63 to 0
1224		subs 	r1, r1, #1 << 5
1225		bcs	1b			@ segments 7 to 0
1226
1227		teq	r2, #0
1228		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1229		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1230		mov	pc, lr
1231		
1232__fa526_cache_flush:
1233		tst	r4, #1
1234		movne	pc, lr
1235		mov	r1, #0
1236		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1237		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1238		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1239		mov	pc, lr
1240
1241__armv6_mmu_cache_flush:
1242		mov	r1, #0
1243		tst	r4, #1
1244		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1245		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1246		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1247		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1248		mov	pc, lr
1249
1250__armv7_mmu_cache_flush:
1251		enable_cp15_barriers	r10
1252		tst	r4, #1
1253		bne	iflush
1254		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1255		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1256		mov	r10, #0
1257		beq	hierarchical
1258		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1259		b	iflush
1260hierarchical:
1261		dcache_line_size r1, r2		@ r1 := dcache min line size
1262		sub	r2, r1, #1		@ r2 := line size mask
1263		bic	r0, r0, r2		@ round down start to line size
1264		sub	r11, r11, #1		@ end address is exclusive
1265		bic	r11, r11, r2		@ round down end to line size
12660:		cmp	r0, r11			@ finished?
1267		bgt	iflush
1268		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1269		add	r0, r0, r1
1270		b	0b
1271iflush:
1272		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1273		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1274		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1275		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1276		mov	pc, lr
1277
1278__armv5tej_mmu_cache_flush:
1279		tst	r4, #1
1280		movne	pc, lr
12811:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1282		bne	1b
1283		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1284		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1285		mov	pc, lr
1286
1287__armv4_mmu_cache_flush:
1288		tst	r4, #1
1289		movne	pc, lr
1290		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1291		mov	r11, #32		@ default: 32 byte line size
1292		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1293		teq	r3, r9			@ cache ID register present?
1294		beq	no_cache_id
1295		mov	r1, r3, lsr #18
1296		and	r1, r1, #7
1297		mov	r2, #1024
1298		mov	r2, r2, lsl r1		@ base dcache size *2
1299		tst	r3, #1 << 14		@ test M bit
1300		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1301		mov	r3, r3, lsr #12
1302		and	r3, r3, #3
1303		mov	r11, #8
1304		mov	r11, r11, lsl r3	@ cache line size in bytes
1305no_cache_id:
1306		mov	r1, pc
1307		bic	r1, r1, #63		@ align to longest cache line
1308		add	r2, r1, r2
13091:
1310 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1311 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1312 THUMB(		add     r1, r1, r11		)
1313		teq	r1, r2
1314		bne	1b
1315
1316		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1317		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1318		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1319		mov	pc, lr
1320
1321__armv3_mmu_cache_flush:
1322__armv3_mpu_cache_flush:
1323		tst	r4, #1
1324		movne	pc, lr
1325		mov	r1, #0
1326		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1327		mov	pc, lr
1328
1329/*
1330 * Various debugging routines for printing hex characters and
1331 * memory, which again must be relocatable.
1332 */
1333#ifdef DEBUG
1334		.align	2
1335		.type	phexbuf,#object
1336phexbuf:	.space	12
1337		.size	phexbuf, . - phexbuf
1338
1339@ phex corrupts {r0, r1, r2, r3}
1340phex:		adr	r3, phexbuf
1341		mov	r2, #0
1342		strb	r2, [r3, r1]
13431:		subs	r1, r1, #1
1344		movmi	r0, r3
1345		bmi	puts
1346		and	r2, r0, #15
1347		mov	r0, r0, lsr #4
1348		cmp	r2, #10
1349		addge	r2, r2, #7
1350		add	r2, r2, #'0'
1351		strb	r2, [r3, r1]
1352		b	1b
1353
1354@ puts corrupts {r0, r1, r2, r3}
1355puts:		loadsp	r3, r2, r1
13561:		ldrb	r2, [r0], #1
1357		teq	r2, #0
1358		moveq	pc, lr
13592:		writeb	r2, r3
1360		mov	r1, #0x00020000
13613:		subs	r1, r1, #1
1362		bne	3b
1363		teq	r2, #'\n'
1364		moveq	r2, #'\r'
1365		beq	2b
1366		teq	r0, #0
1367		bne	1b
1368		mov	pc, lr
1369@ putc corrupts {r0, r1, r2, r3}
1370putc:
1371		mov	r2, r0
1372		loadsp	r3, r1, r0
1373		mov	r0, #0
1374		b	2b
1375
1376@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1377memdump:	mov	r12, r0
1378		mov	r10, lr
1379		mov	r11, #0
13802:		mov	r0, r11, lsl #2
1381		add	r0, r0, r12
1382		mov	r1, #8
1383		bl	phex
1384		mov	r0, #':'
1385		bl	putc
13861:		mov	r0, #' '
1387		bl	putc
1388		ldr	r0, [r12, r11, lsl #2]
1389		mov	r1, #8
1390		bl	phex
1391		and	r0, r11, #7
1392		teq	r0, #3
1393		moveq	r0, #' '
1394		bleq	putc
1395		and	r0, r11, #7
1396		add	r11, r11, #1
1397		teq	r0, #7
1398		bne	1b
1399		mov	r0, #'\n'
1400		bl	putc
1401		cmp	r11, #64
1402		blt	2b
1403		mov	pc, r10
1404#endif
1405
1406		.ltorg
1407
1408#ifdef CONFIG_ARM_VIRT_EXT
1409.align 5
1410__hyp_reentry_vectors:
1411		W(b)	.			@ reset
1412		W(b)	.			@ undef
1413#ifdef CONFIG_EFI_STUB
1414		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1415#else
1416		W(b)	.			@ svc
1417#endif
1418		W(b)	.			@ pabort
1419		W(b)	.			@ dabort
1420		W(b)	__enter_kernel		@ hyp
1421		W(b)	.			@ irq
1422		W(b)	.			@ fiq
1423#endif /* CONFIG_ARM_VIRT_EXT */
1424
1425__enter_kernel:
1426		mov	r0, #0			@ must be 0
1427		mov	r1, r7			@ restore architecture number
1428		mov	r2, r8			@ restore atags pointer
1429 ARM(		mov	pc, r4		)	@ call kernel
1430 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1431 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1432
1433reloc_code_end:
1434
1435#ifdef CONFIG_EFI_STUB
1436__enter_kernel_from_hyp:
1437		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1438		bic	r0, r0, #0x5		@ disable MMU and caches
1439		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1440		isb
1441		b	__enter_kernel
1442
1443ENTRY(efi_enter_kernel)
1444		mov	r4, r0			@ preserve image base
1445		mov	r8, r1			@ preserve DT pointer
1446
1447 ARM(		adrl	r0, call_cache_fn	)
1448 THUMB(		adr	r0, call_cache_fn	)
1449		adr	r1, 0f			@ clean the region of code we
1450		bl	cache_clean_flush	@ may run with the MMU off
1451
1452#ifdef CONFIG_ARM_VIRT_EXT
1453		@
1454		@ The EFI spec does not support booting on ARM in HYP mode,
1455		@ since it mandates that the MMU and caches are on, with all
1456		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1457		@
1458		@ While the EDK2 reference implementation adheres to this,
1459		@ U-Boot might decide to enter the EFI stub in HYP mode
1460		@ anyway, with the MMU and caches either on or off.
1461		@
1462		mrs	r0, cpsr		@ get the current mode
1463		msr	spsr_cxsf, r0		@ record boot mode
1464		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1465		cmp	r0, #HYP_MODE
1466		bne	.Lefi_svc
1467
1468		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1469		tst	r1, #0x1		@ MMU enabled at HYP?
1470		beq	1f
1471
1472		@
1473		@ When running in HYP mode with the caches on, we're better
1474		@ off just carrying on using the cached 1:1 mapping that the
1475		@ firmware provided. Set up the HYP vectors so HVC instructions
1476		@ issued from HYP mode take us to the correct handler code. We
1477		@ will disable the MMU before jumping to the kernel proper.
1478		@
 
 
 
1479		adr	r0, __hyp_reentry_vectors
1480		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1481		isb
1482		b	.Lefi_hyp
1483
1484		@
1485		@ When running in HYP mode with the caches off, we need to drop
1486		@ into SVC mode now, and let the decompressor set up its cached
1487		@ 1:1 mapping as usual.
1488		@
14891:		mov	r9, r4			@ preserve image base
1490		bl	__hyp_stub_install	@ install HYP stub vectors
1491		safe_svcmode_maskall	r1	@ drop to SVC mode
1492		msr	spsr_cxsf, r0		@ record boot mode
1493		orr	r4, r9, #1		@ restore image base and set LSB
1494		b	.Lefi_hyp
1495.Lefi_svc:
1496#endif
1497		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1498		tst	r0, #0x1		@ MMU enabled?
1499		orreq	r4, r4, #1		@ set LSB if not
1500
1501.Lefi_hyp:
1502		mov	r0, r8			@ DT start
1503		add	r1, r8, r2		@ DT end
1504		bl	cache_clean_flush
1505
1506		adr	r0, 0f			@ switch to our stack
1507		ldr	sp, [r0]
1508		add	sp, sp, r0
1509
1510		mov	r5, #0			@ appended DTB size
1511		mov	r7, #0xFFFFFFFF		@ machine ID
1512		b	wont_overwrite
1513ENDPROC(efi_enter_kernel)
15140:		.long	.L_user_stack_end - .
1515#endif
1516
1517		.align
1518		.section ".stack", "aw", %nobits
1519.L_user_stack:	.space	4096
1520.L_user_stack_end: