Linux Audio

Check our new training course

Loading...
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 *  linux/arch/arm/boot/compressed/head.S
   4 *
   5 *  Copyright (C) 1996-2002 Russell King
   6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
   7 */
   8#include <linux/linkage.h>
   9#include <asm/assembler.h>
  10#include <asm/v7m.h>
  11
  12#include "efi-header.S"
  13
  14#ifdef __ARMEB__
  15#define OF_DT_MAGIC 0xd00dfeed
  16#else
  17#define OF_DT_MAGIC 0xedfe0dd0
  18#endif
  19
  20 AR_CLASS(	.arch	armv7-a	)
  21 M_CLASS(	.arch	armv7-m	)
  22
  23/*
  24 * Debugging stuff
  25 *
  26 * Note that these macros must not contain any code which is not
  27 * 100% relocatable.  Any attempt to do so will result in a crash.
  28 * Please select one of the following when turning on debugging.
  29 */
  30#ifdef DEBUG
  31
  32#if defined(CONFIG_DEBUG_ICEDCC)
  33
  34#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
  35		.macro	loadsp, rb, tmp1, tmp2
  36		.endm
  37		.macro	writeb, ch, rb, tmp
  38		mcr	p14, 0, \ch, c0, c5, 0
  39		.endm
  40#elif defined(CONFIG_CPU_XSCALE)
  41		.macro	loadsp, rb, tmp1, tmp2
  42		.endm
  43		.macro	writeb, ch, rb, tmp
  44		mcr	p14, 0, \ch, c8, c0, 0
  45		.endm
  46#else
  47		.macro	loadsp, rb, tmp1, tmp2
  48		.endm
  49		.macro	writeb, ch, rb, tmp
  50		mcr	p14, 0, \ch, c1, c0, 0
  51		.endm
  52#endif
  53
  54#else
  55
  56#include CONFIG_DEBUG_LL_INCLUDE
  57
  58		.macro	writeb,	ch, rb, tmp
  59#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
  60		waituartcts \tmp, \rb
  61#endif
  62		waituarttxrdy \tmp, \rb
  63		senduart \ch, \rb
  64		busyuart \tmp, \rb
  65		.endm
  66
  67#if defined(CONFIG_ARCH_SA1100)
  68		.macro	loadsp, rb, tmp1, tmp2
  69		mov	\rb, #0x80000000	@ physical base address
  70		add	\rb, \rb, #0x00010000	@ Ser1
  71		.endm
  72#else
  73		.macro	loadsp,	rb, tmp1, tmp2
  74		addruart \rb, \tmp1, \tmp2
  75		.endm
  76#endif
  77#endif
  78#endif
  79
  80		.macro	kputc,val
  81		mov	r0, \val
  82		bl	putc
  83		.endm
  84
  85		.macro	kphex,val,len
  86		mov	r0, \val
  87		mov	r1, #\len
  88		bl	phex
  89		.endm
  90
  91		/*
  92		 * Debug kernel copy by printing the memory addresses involved
  93		 */
  94		.macro dbgkc, begin, end, cbegin, cend
  95#ifdef DEBUG
  96		kputc   #'C'
  97		kputc   #':'
  98		kputc   #'0'
  99		kputc   #'x'
 100		kphex   \begin, 8	/* Start of compressed kernel */
 101		kputc	#'-'
 102		kputc	#'0'
 103		kputc	#'x'
 104		kphex	\end, 8		/* End of compressed kernel */
 105		kputc	#'-'
 106		kputc	#'>'
 107		kputc   #'0'
 108		kputc   #'x'
 109		kphex   \cbegin, 8	/* Start of kernel copy */
 110		kputc	#'-'
 111		kputc	#'0'
 112		kputc	#'x'
 113		kphex	\cend, 8	/* End of kernel copy */
 114		kputc	#'\n'
 115#endif
 116		.endm
 117
 118		/*
 119		 * Debug print of the final appended DTB location
 120		 */
 121		.macro dbgadtb, begin, size
 122#ifdef DEBUG
 123		kputc   #'D'
 124		kputc   #'T'
 125		kputc   #'B'
 126		kputc   #':'
 127		kputc   #'0'
 128		kputc   #'x'
 129		kphex   \begin, 8	/* Start of appended DTB */
 130		kputc	#' '
 131		kputc	#'('
 132		kputc	#'0'
 133		kputc	#'x'
 134		kphex	\size, 8	/* Size of appended DTB */
 135		kputc	#')'
 136		kputc	#'\n'
 137#endif
 138		.endm
 139
 140		.macro	enable_cp15_barriers, reg
 141		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
 142		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
 143		bne	.L_\@
 144		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
 145		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
 146 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
 147 THUMB(		isb						)
 148.L_\@:
 149		.endm
 150
 151		/*
 152		 * The kernel build system appends the size of the
 153		 * decompressed kernel at the end of the compressed data
 154		 * in little-endian form.
 155		 */
 156		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
 157		adr	\res, .Linflated_image_size_offset
 158		ldr	\tmp1, [\res]
 159		add	\tmp1, \tmp1, \res	@ address of inflated image size
 160
 161		ldrb	\res, [\tmp1]		@ get_unaligned_le32
 162		ldrb	\tmp2, [\tmp1, #1]
 163		orr	\res, \res, \tmp2, lsl #8
 164		ldrb	\tmp2, [\tmp1, #2]
 165		ldrb	\tmp1, [\tmp1, #3]
 166		orr	\res, \res, \tmp2, lsl #16
 167		orr	\res, \res, \tmp1, lsl #24
 168		.endm
 169
 170		.macro	be32tocpu, val, tmp
 171#ifndef __ARMEB__
 172		/* convert to little endian */
 173		rev_l	\val, \tmp
 174#endif
 175		.endm
 176
 177		.section ".start", "ax"
 178/*
 179 * sort out different calling conventions
 180 */
 181		.align
 182		/*
 183		 * Always enter in ARM state for CPUs that support the ARM ISA.
 184		 * As of today (2014) that's exactly the members of the A and R
 185		 * classes.
 186		 */
 187 AR_CLASS(	.arm	)
 188start:
 189		.type	start,#function
 190		/*
 191		 * These 7 nops along with the 1 nop immediately below for
 192		 * !THUMB2 form 8 nops that make the compressed kernel bootable
 193		 * on legacy ARM systems that were assuming the kernel in a.out
 194		 * binary format. The boot loaders on these systems would
 195		 * jump 32 bytes into the image to skip the a.out header.
 196		 * with these 8 nops filling exactly 32 bytes, things still
 197		 * work as expected on these legacy systems. Thumb2 mode keeps
 198		 * 7 of the nops as it turns out that some boot loaders
 199		 * were patching the initial instructions of the kernel, i.e
 200		 * had started to exploit this "patch area".
 201		 */
 202		__initial_nops
 203		.rept	5
 204		__nop
 205		.endr
 206#ifndef CONFIG_THUMB2_KERNEL
 207		__nop
 208#else
 209 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
 210  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
 211		.thumb
 212#endif
 213		W(b)	1f
 214
 215		.word	_magic_sig	@ Magic numbers to help the loader
 216		.word	_magic_start	@ absolute load/run zImage address
 217		.word	_magic_end	@ zImage end address
 218		.word	0x04030201	@ endianness flag
 219		.word	0x45454545	@ another magic number to indicate
 220		.word	_magic_table	@ additional data table
 221
 222		__EFI_HEADER
 2231:
 224 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
 225 AR_CLASS(	mrs	r9, cpsr	)
 226#ifdef CONFIG_ARM_VIRT_EXT
 227		bl	__hyp_stub_install	@ get into SVC mode, reversibly
 228#endif
 229		mov	r7, r1			@ save architecture ID
 230		mov	r8, r2			@ save atags pointer
 231
 232#ifndef CONFIG_CPU_V7M
 233		/*
 234		 * Booting from Angel - need to enter SVC mode and disable
 235		 * FIQs/IRQs (numeric definitions from angel arm.h source).
 236		 * We only do this if we were in user mode on entry.
 237		 */
 238		mrs	r2, cpsr		@ get current mode
 239		tst	r2, #3			@ not user?
 240		bne	not_angel
 241		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
 242 ARM(		swi	0x123456	)	@ angel_SWI_ARM
 243 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
 244not_angel:
 245		safe_svcmode_maskall r0
 246		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
 247						@ SPSR
 248#endif
 249		/*
 250		 * Note that some cache flushing and other stuff may
 251		 * be needed here - is there an Angel SWI call for this?
 252		 */
 253
 254		/*
 255		 * some architecture specific code can be inserted
 256		 * by the linker here, but it should preserve r7, r8, and r9.
 257		 */
 258
 259		.text
 260
 261#ifdef CONFIG_AUTO_ZRELADDR
 262		/*
 263		 * Find the start of physical memory.  As we are executing
 264		 * without the MMU on, we are in the physical address space.
 265		 * We just need to get rid of any offset by aligning the
 266		 * address.
 267		 *
 268		 * This alignment is a balance between the requirements of
 269		 * different platforms - we have chosen 128MB to allow
 270		 * platforms which align the start of their physical memory
 271		 * to 128MB to use this feature, while allowing the zImage
 272		 * to be placed within the first 128MB of memory on other
 273		 * platforms.  Increasing the alignment means we place
 274		 * stricter alignment requirements on the start of physical
 275		 * memory, but relaxing it means that we break people who
 276		 * are already placing their zImage in (eg) the top 64MB
 277		 * of this range.
 278		 */
 279		mov	r0, pc
 280		and	r0, r0, #0xf8000000
 281#ifdef CONFIG_USE_OF
 282		adr	r1, LC1
 283#ifdef CONFIG_ARM_APPENDED_DTB
 284		/*
 285		 * Look for an appended DTB.  If found, we cannot use it to
 286		 * validate the calculated start of physical memory, as its
 287		 * memory nodes may need to be augmented by ATAGS stored at
 288		 * an offset from the same start of physical memory.
 289		 */
 290		ldr	r2, [r1, #4]	@ get &_edata
 291		add	r2, r2, r1	@ relocate it
 292		ldr	r2, [r2]	@ get DTB signature
 293		ldr	r3, =OF_DT_MAGIC
 294		cmp	r2, r3		@ do we have a DTB there?
 295		beq	1f		@ if yes, skip validation
 296#endif /* CONFIG_ARM_APPENDED_DTB */
 297
 298		/*
 299		 * Make sure we have some stack before calling C code.
 300		 * No GOT fixup has occurred yet, but none of the code we're
 301		 * about to call uses any global variables.
 302		 */
 303		ldr	sp, [r1]	@ get stack location
 304		add	sp, sp, r1	@ apply relocation
 305
 306		/* Validate calculated start against passed DTB */
 307		mov	r1, r8
 308		bl	fdt_check_mem_start
 3091:
 310#endif /* CONFIG_USE_OF */
 311		/* Determine final kernel image address. */
 312		add	r4, r0, #TEXT_OFFSET
 313#else
 314		ldr	r4, =zreladdr
 315#endif
 316
 317		/*
 318		 * Set up a page table only if it won't overwrite ourself.
 319		 * That means r4 < pc || r4 - 16k page directory > &_end.
 320		 * Given that r4 > &_end is most unfrequent, we add a rough
 321		 * additional 1MB of room for a possible appended DTB.
 322		 */
 323		mov	r0, pc
 324		cmp	r0, r4
 325		ldrcc	r0, .Lheadroom
 326		addcc	r0, r0, pc
 327		cmpcc	r4, r0
 328		orrcc	r4, r4, #1		@ remember we skipped cache_on
 329		blcs	cache_on
 330
 331restart:	adr	r0, LC1
 332		ldr	sp, [r0]
 333		ldr	r6, [r0, #4]
 334		add	sp, sp, r0
 335		add	r6, r6, r0
 336
 337		get_inflated_image_size	r9, r10, lr
 338
 339#ifndef CONFIG_ZBOOT_ROM
 340		/* malloc space is above the relocated stack (64k max) */
 341		add	r10, sp, #MALLOC_SIZE
 342#else
 343		/*
 344		 * With ZBOOT_ROM the bss/stack is non relocatable,
 345		 * but someone could still run this code from RAM,
 346		 * in which case our reference is _edata.
 347		 */
 348		mov	r10, r6
 349#endif
 350
 351		mov	r5, #0			@ init dtb size to 0
 352#ifdef CONFIG_ARM_APPENDED_DTB
 353/*
 354 *   r4  = final kernel address (possibly with LSB set)
 355 *   r5  = appended dtb size (still unknown)
 356 *   r6  = _edata
 357 *   r7  = architecture ID
 358 *   r8  = atags/device tree pointer
 359 *   r9  = size of decompressed image
 360 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 361 *   sp  = stack pointer
 362 *
 363 * if there are device trees (dtb) appended to zImage, advance r10 so that the
 364 * dtb data will get relocated along with the kernel if necessary.
 365 */
 366
 367		ldr	lr, [r6, #0]
 368		ldr	r1, =OF_DT_MAGIC
 369		cmp	lr, r1
 370		bne	dtb_check_done		@ not found
 371
 372#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
 373		/*
 374		 * OK... Let's do some funky business here.
 375		 * If we do have a DTB appended to zImage, and we do have
 376		 * an ATAG list around, we want the later to be translated
 377		 * and folded into the former here. No GOT fixup has occurred
 378		 * yet, but none of the code we're about to call uses any
 379		 * global variable.
 380		*/
 381
 382		/* Get the initial DTB size */
 383		ldr	r5, [r6, #4]
 384		be32tocpu r5, r1
 385		dbgadtb	r6, r5
 386		/* 50% DTB growth should be good enough */
 387		add	r5, r5, r5, lsr #1
 388		/* preserve 64-bit alignment */
 389		add	r5, r5, #7
 390		bic	r5, r5, #7
 391		/* clamp to 32KB min and 1MB max */
 392		cmp	r5, #(1 << 15)
 393		movlo	r5, #(1 << 15)
 394		cmp	r5, #(1 << 20)
 395		movhi	r5, #(1 << 20)
 396		/* temporarily relocate the stack past the DTB work space */
 397		add	sp, sp, r5
 398
 399		mov	r0, r8
 400		mov	r1, r6
 401		mov	r2, r5
 402		bl	atags_to_fdt
 403
 404		/*
 405		 * If returned value is 1, there is no ATAG at the location
 406		 * pointed by r8.  Try the typical 0x100 offset from start
 407		 * of RAM and hope for the best.
 408		 */
 409		cmp	r0, #1
 410		sub	r0, r4, #TEXT_OFFSET
 411		bic	r0, r0, #1
 412		add	r0, r0, #0x100
 413		mov	r1, r6
 414		mov	r2, r5
 415		bleq	atags_to_fdt
 416
 417		sub	sp, sp, r5
 418#endif
 419
 420		mov	r8, r6			@ use the appended device tree
 421
 422		/*
 423		 * Make sure that the DTB doesn't end up in the final
 424		 * kernel's .bss area. To do so, we adjust the decompressed
 425		 * kernel size to compensate if that .bss size is larger
 426		 * than the relocated code.
 427		 */
 428		ldr	r5, =_kernel_bss_size
 429		adr	r1, wont_overwrite
 430		sub	r1, r6, r1
 431		subs	r1, r5, r1
 432		addhi	r9, r9, r1
 433
 434		/* Get the current DTB size */
 435		ldr	r5, [r6, #4]
 436		be32tocpu r5, r1
 437
 438		/* preserve 64-bit alignment */
 439		add	r5, r5, #7
 440		bic	r5, r5, #7
 441
 442		/* relocate some pointers past the appended dtb */
 443		add	r6, r6, r5
 444		add	r10, r10, r5
 445		add	sp, sp, r5
 446dtb_check_done:
 447#endif
 448
 449/*
 450 * Check to see if we will overwrite ourselves.
 451 *   r4  = final kernel address (possibly with LSB set)
 452 *   r9  = size of decompressed image
 453 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
 454 * We basically want:
 455 *   r4 - 16k page directory >= r10 -> OK
 456 *   r4 + image length <= address of wont_overwrite -> OK
 457 * Note: the possible LSB in r4 is harmless here.
 458 */
 459		add	r10, r10, #16384
 460		cmp	r4, r10
 461		bhs	wont_overwrite
 462		add	r10, r4, r9
 463		adr	r9, wont_overwrite
 464		cmp	r10, r9
 465		bls	wont_overwrite
 466
 467/*
 468 * Relocate ourselves past the end of the decompressed kernel.
 469 *   r6  = _edata
 470 *   r10 = end of the decompressed kernel
 471 * Because we always copy ahead, we need to do it from the end and go
 472 * backward in case the source and destination overlap.
 473 */
 474		/*
 475		 * Bump to the next 256-byte boundary with the size of
 476		 * the relocation code added. This avoids overwriting
 477		 * ourself when the offset is small.
 478		 */
 479		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
 480		bic	r10, r10, #255
 481
 482		/* Get start of code we want to copy and align it down. */
 483		adr	r5, restart
 484		bic	r5, r5, #31
 485
 486/* Relocate the hyp vector base if necessary */
 487#ifdef CONFIG_ARM_VIRT_EXT
 488		mrs	r0, spsr
 489		and	r0, r0, #MODE_MASK
 490		cmp	r0, #HYP_MODE
 491		bne	1f
 492
 493		/*
 494		 * Compute the address of the hyp vectors after relocation.
 495		 * Call __hyp_set_vectors with the new address so that we
 496		 * can HVC again after the copy.
 497		 */
 498		adr_l	r0, __hyp_stub_vectors
 499		sub	r0, r0, r5
 500		add	r0, r0, r10
 501		bl	__hyp_set_vectors
 5021:
 503#endif
 504
 505		sub	r9, r6, r5		@ size to copy
 506		add	r9, r9, #31		@ rounded up to a multiple
 507		bic	r9, r9, #31		@ ... of 32 bytes
 508		add	r6, r9, r5
 509		add	r9, r9, r10
 510
 511#ifdef DEBUG
 512		sub     r10, r6, r5
 513		sub     r10, r9, r10
 514		/*
 515		 * We are about to copy the kernel to a new memory area.
 516		 * The boundaries of the new memory area can be found in
 517		 * r10 and r9, whilst r5 and r6 contain the boundaries
 518		 * of the memory we are going to copy.
 519		 * Calling dbgkc will help with the printing of this
 520		 * information.
 521		 */
 522		dbgkc	r5, r6, r10, r9
 523#endif
 524
 5251:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
 526		cmp	r6, r5
 527		stmdb	r9!, {r0 - r3, r10 - r12, lr}
 528		bhi	1b
 529
 530		/* Preserve offset to relocated code. */
 531		sub	r6, r9, r6
 532
 533		mov	r0, r9			@ start of relocated zImage
 534		add	r1, sp, r6		@ end of relocated zImage
 535		bl	cache_clean_flush
 536
 537		badr	r0, restart
 538		add	r0, r0, r6
 539		mov	pc, r0
 540
 541wont_overwrite:
 542		adr	r0, LC0
 543		ldmia	r0, {r1, r2, r3, r11, r12}
 544		sub	r0, r0, r1		@ calculate the delta offset
 545
 546/*
 547 * If delta is zero, we are running at the address we were linked at.
 548 *   r0  = delta
 549 *   r2  = BSS start
 550 *   r3  = BSS end
 551 *   r4  = kernel execution address (possibly with LSB set)
 552 *   r5  = appended dtb size (0 if not present)
 553 *   r7  = architecture ID
 554 *   r8  = atags pointer
 555 *   r11 = GOT start
 556 *   r12 = GOT end
 557 *   sp  = stack pointer
 558 */
 559		orrs	r1, r0, r5
 560		beq	not_relocated
 561
 562		add	r11, r11, r0
 563		add	r12, r12, r0
 564
 565#ifndef CONFIG_ZBOOT_ROM
 566		/*
 567		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
 568		 * we need to fix up pointers into the BSS region.
 569		 * Note that the stack pointer has already been fixed up.
 570		 */
 571		add	r2, r2, r0
 572		add	r3, r3, r0
 573
 574		/*
 575		 * Relocate all entries in the GOT table.
 576		 * Bump bss entries to _edata + dtb size
 577		 */
 5781:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
 579		add	r1, r1, r0		@ This fixes up C references
 580		cmp	r1, r2			@ if entry >= bss_start &&
 581		cmphs	r3, r1			@       bss_end > entry
 582		addhi	r1, r1, r5		@    entry += dtb size
 583		str	r1, [r11], #4		@ next entry
 584		cmp	r11, r12
 585		blo	1b
 586
 587		/* bump our bss pointers too */
 588		add	r2, r2, r5
 589		add	r3, r3, r5
 590
 591#else
 592
 593		/*
 594		 * Relocate entries in the GOT table.  We only relocate
 595		 * the entries that are outside the (relocated) BSS region.
 596		 */
 5971:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
 598		cmp	r1, r2			@ entry < bss_start ||
 599		cmphs	r3, r1			@ _end < entry
 600		addlo	r1, r1, r0		@ table.  This fixes up the
 601		str	r1, [r11], #4		@ C references.
 602		cmp	r11, r12
 603		blo	1b
 604#endif
 605
 606not_relocated:	mov	r0, #0
 6071:		str	r0, [r2], #4		@ clear bss
 608		str	r0, [r2], #4
 609		str	r0, [r2], #4
 610		str	r0, [r2], #4
 611		cmp	r2, r3
 612		blo	1b
 613
 614		/*
 615		 * Did we skip the cache setup earlier?
 616		 * That is indicated by the LSB in r4.
 617		 * Do it now if so.
 618		 */
 619		tst	r4, #1
 620		bic	r4, r4, #1
 621		blne	cache_on
 622
 623/*
 624 * The C runtime environment should now be setup sufficiently.
 625 * Set up some pointers, and start decompressing.
 626 *   r4  = kernel execution address
 627 *   r7  = architecture ID
 628 *   r8  = atags pointer
 629 */
 630		mov	r0, r4
 631		mov	r1, sp			@ malloc space above stack
 632		add	r2, sp, #MALLOC_SIZE	@ 64k max
 633		mov	r3, r7
 634		bl	decompress_kernel
 635
 636		get_inflated_image_size	r1, r2, r3
 637
 638		mov	r0, r4			@ start of inflated image
 639		add	r1, r1, r0		@ end of inflated image
 640		bl	cache_clean_flush
 641		bl	cache_off
 642
 643#ifdef CONFIG_ARM_VIRT_EXT
 644		mrs	r0, spsr		@ Get saved CPU boot mode
 645		and	r0, r0, #MODE_MASK
 646		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
 647		bne	__enter_kernel		@ boot kernel directly
 648
 649		adr_l	r0, __hyp_reentry_vectors
 650		bl	__hyp_set_vectors
 651		__HVC(0)			@ otherwise bounce to hyp mode
 652
 653		b	.			@ should never be reached
 654#else
 655		b	__enter_kernel
 656#endif
 657
 658		.align	2
 659		.type	LC0, #object
 660LC0:		.word	LC0			@ r1
 661		.word	__bss_start		@ r2
 662		.word	_end			@ r3
 663		.word	_got_start		@ r11
 664		.word	_got_end		@ ip
 665		.size	LC0, . - LC0
 666
 667		.type	LC1, #object
 668LC1:		.word	.L_user_stack_end - LC1	@ sp
 669		.word	_edata - LC1		@ r6
 670		.size	LC1, . - LC1
 671
 672.Lheadroom:
 673		.word	_end - restart + 16384 + 1024*1024
 674
 675.Linflated_image_size_offset:
 676		.long	(input_data_end - 4) - .
 677
 678#ifdef CONFIG_ARCH_RPC
 679		.globl	params
 680params:		ldr	r0, =0x10000100		@ params_phys for RPC
 681		mov	pc, lr
 682		.ltorg
 683		.align
 684#endif
 685
 686/*
 687 * dcache_line_size - get the minimum D-cache line size from the CTR register
 688 * on ARMv7.
 689 */
 690		.macro	dcache_line_size, reg, tmp
 691#ifdef CONFIG_CPU_V7M
 692		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
 693		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
 694		ldr	\tmp, [\tmp]
 695#else
 696		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
 697#endif
 698		lsr	\tmp, \tmp, #16
 699		and	\tmp, \tmp, #0xf		@ cache line size encoding
 700		mov	\reg, #4			@ bytes per word
 701		mov	\reg, \reg, lsl \tmp		@ actual cache line size
 702		.endm
 703
 704/*
 705 * Turn on the cache.  We need to setup some page tables so that we
 706 * can have both the I and D caches on.
 707 *
 708 * We place the page tables 16k down from the kernel execution address,
 709 * and we hope that nothing else is using it.  If we're using it, we
 710 * will go pop!
 711 *
 712 * On entry,
 713 *  r4 = kernel execution address
 714 *  r7 = architecture number
 715 *  r8 = atags pointer
 716 * On exit,
 717 *  r0, r1, r2, r3, r9, r10, r12 corrupted
 718 * This routine must preserve:
 719 *  r4, r7, r8
 720 */
 721		.align	5
 722cache_on:	mov	r3, #8			@ cache_on function
 723		b	call_cache_fn
 724
 725/*
 726 * Initialize the highest priority protection region, PR7
 727 * to cover all 32bit address and cacheable and bufferable.
 728 */
 729__armv4_mpu_cache_on:
 730		mov	r0, #0x3f		@ 4G, the whole
 731		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
 732		mcr 	p15, 0, r0, c6, c7, 1
 733
 734		mov	r0, #0x80		@ PR7
 735		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
 736		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
 737		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
 738
 739		mov	r0, #0xc000
 740		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
 741		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
 742
 743		mov	r0, #0
 744		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 745		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
 746		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
 747		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 748						@ ...I .... ..D. WC.M
 749		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
 750		orr	r0, r0, #0x1000		@ ...1 .... .... ....
 751
 752		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
 753
 754		mov	r0, #0
 755		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
 756		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
 757		mov	pc, lr
 758
 759__armv3_mpu_cache_on:
 760		mov	r0, #0x3f		@ 4G, the whole
 761		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
 762
 763		mov	r0, #0x80		@ PR7
 764		mcr	p15, 0, r0, c2, c0, 0	@ cache on
 765		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
 766
 767		mov	r0, #0xc000
 768		mcr	p15, 0, r0, c5, c0, 0	@ access permission
 769
 770		mov	r0, #0
 771		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
 772		/*
 773		 * ?? ARMv3 MMU does not allow reading the control register,
 774		 * does this really work on ARMv3 MPU?
 775		 */
 776		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 777						@ .... .... .... WC.M
 778		orr	r0, r0, #0x000d		@ .... .... .... 11.1
 779		/* ?? this overwrites the value constructed above? */
 780		mov	r0, #0
 781		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
 782
 783		/* ?? invalidate for the second time? */
 784		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
 785		mov	pc, lr
 786
 787#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 788#define CB_BITS 0x08
 789#else
 790#define CB_BITS 0x0c
 791#endif
 792
 793__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
 794		bic	r3, r3, #0xff		@ Align the pointer
 795		bic	r3, r3, #0x3f00
 796/*
 797 * Initialise the page tables, turning on the cacheable and bufferable
 798 * bits for the RAM area only.
 799 */
 800		mov	r0, r3
 801		mov	r9, r0, lsr #18
 802		mov	r9, r9, lsl #18		@ start of RAM
 803		add	r10, r9, #0x10000000	@ a reasonable RAM size
 804		mov	r1, #0x12		@ XN|U + section mapping
 805		orr	r1, r1, #3 << 10	@ AP=11
 806		add	r2, r3, #16384
 8071:		cmp	r1, r9			@ if virt > start of RAM
 808		cmphs	r10, r1			@   && end of RAM > virt
 809		bic	r1, r1, #0x1c		@ clear XN|U + C + B
 810		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
 811		orrhs	r1, r1, r6		@ set RAM section settings
 812		str	r1, [r0], #4		@ 1:1 mapping
 813		add	r1, r1, #1048576
 814		teq	r0, r2
 815		bne	1b
 816/*
 817 * If ever we are running from Flash, then we surely want the cache
 818 * to be enabled also for our execution instance...  We map 2MB of it
 819 * so there is no map overlap problem for up to 1 MB compressed kernel.
 820 * If the execution is in RAM then we would only be duplicating the above.
 821 */
 822		orr	r1, r6, #0x04		@ ensure B is set for this
 823		orr	r1, r1, #3 << 10
 824		mov	r2, pc
 825		mov	r2, r2, lsr #20
 826		orr	r1, r1, r2, lsl #20
 827		add	r0, r3, r2, lsl #2
 828		str	r1, [r0], #4
 829		add	r1, r1, #1048576
 830		str	r1, [r0]
 831		mov	pc, lr
 832ENDPROC(__setup_mmu)
 833
 834@ Enable unaligned access on v6, to allow better code generation
 835@ for the decompressor C code:
 836__armv6_mmu_cache_on:
 837		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
 838		bic	r0, r0, #2		@ A (no unaligned access fault)
 839		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
 840		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
 841		b	__armv4_mmu_cache_on
 842
 843__arm926ejs_mmu_cache_on:
 844#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
 845		mov	r0, #4			@ put dcache in WT mode
 846		mcr	p15, 7, r0, c15, c0, 0
 847#endif
 848
 849__armv4_mmu_cache_on:
 850		mov	r12, lr
 851#ifdef CONFIG_MMU
 852		mov	r6, #CB_BITS | 0x12	@ U
 853		bl	__setup_mmu
 854		mov	r0, #0
 855		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 856		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 857		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 858		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 859		orr	r0, r0, #0x0030
 860 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
 861		bl	__common_mmu_cache_on
 862		mov	r0, #0
 863		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 864#endif
 865		mov	pc, r12
 866
 867__armv7_mmu_cache_on:
 868		enable_cp15_barriers	r11
 869		mov	r12, lr
 870#ifdef CONFIG_MMU
 871		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
 872		tst	r11, #0xf		@ VMSA
 873		movne	r6, #CB_BITS | 0x02	@ !XN
 874		blne	__setup_mmu
 875		mov	r0, #0
 876		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 877		tst	r11, #0xf		@ VMSA
 878		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
 879#endif
 880		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 881		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
 882		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
 883		orr	r0, r0, #0x003c		@ write buffer
 884		bic	r0, r0, #2		@ A (no unaligned access fault)
 885		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
 886						@ (needed for ARM1176)
 887#ifdef CONFIG_MMU
 888 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
 889		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
 890		orrne	r0, r0, #1		@ MMU enabled
 891		movne	r1, #0xfffffffd		@ domain 0 = client
 892		bic     r6, r6, #1 << 31        @ 32-bit translation system
 893		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
 894		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
 895		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
 896		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
 897#endif
 898		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 899		mcr	p15, 0, r0, c1, c0, 0	@ load control register
 900		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
 901		mov	r0, #0
 902		mcr	p15, 0, r0, c7, c5, 4	@ ISB
 903		mov	pc, r12
 904
 905__fa526_cache_on:
 906		mov	r12, lr
 907		mov	r6, #CB_BITS | 0x12	@ U
 908		bl	__setup_mmu
 909		mov	r0, #0
 910		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
 911		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
 912		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
 913		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
 914		orr	r0, r0, #0x1000		@ I-cache enable
 915		bl	__common_mmu_cache_on
 916		mov	r0, #0
 917		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
 918		mov	pc, r12
 919
 920__common_mmu_cache_on:
 921#ifndef CONFIG_THUMB2_KERNEL
 922#ifndef DEBUG
 923		orr	r0, r0, #0x000d		@ Write buffer, mmu
 924#endif
 925		mov	r1, #-1
 926		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
 927		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
 928		b	1f
 929		.align	5			@ cache line aligned
 9301:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
 931		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
 932		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
 933#endif
 934
 935#define PROC_ENTRY_SIZE (4*5)
 936
 937/*
 938 * Here follow the relocatable cache support functions for the
 939 * various processors.  This is a generic hook for locating an
 940 * entry and jumping to an instruction at the specified offset
 941 * from the start of the block.  Please note this is all position
 942 * independent code.
 943 *
 944 *  r1  = corrupted
 945 *  r2  = corrupted
 946 *  r3  = block offset
 947 *  r9  = corrupted
 948 *  r12 = corrupted
 949 */
 950
 951call_cache_fn:	adr	r12, proc_types
 952#ifdef CONFIG_CPU_CP15
 953		mrc	p15, 0, r9, c0, c0	@ get processor ID
 954#elif defined(CONFIG_CPU_V7M)
 955		/*
 956		 * On v7-M the processor id is located in the V7M_SCB_CPUID
 957		 * register, but as cache handling is IMPLEMENTATION DEFINED on
 958		 * v7-M (if existant at all) we just return early here.
 959		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
 960		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
 961		 * use cp15 registers that are not implemented on v7-M.
 962		 */
 963		bx	lr
 964#else
 965		ldr	r9, =CONFIG_PROCESSOR_ID
 966#endif
 9671:		ldr	r1, [r12, #0]		@ get value
 968		ldr	r2, [r12, #4]		@ get mask
 969		eor	r1, r1, r9		@ (real ^ match)
 970		tst	r1, r2			@       & mask
 971 ARM(		addeq	pc, r12, r3		) @ call cache function
 972 THUMB(		addeq	r12, r3			)
 973 THUMB(		moveq	pc, r12			) @ call cache function
 974		add	r12, r12, #PROC_ENTRY_SIZE
 975		b	1b
 976
 977/*
 978 * Table for cache operations.  This is basically:
 979 *   - CPU ID match
 980 *   - CPU ID mask
 981 *   - 'cache on' method instruction
 982 *   - 'cache off' method instruction
 983 *   - 'cache flush' method instruction
 984 *
 985 * We match an entry using: ((real_id ^ match) & mask) == 0
 986 *
 987 * Writethrough caches generally only need 'on' and 'off'
 988 * methods.  Writeback caches _must_ have the flush method
 989 * defined.
 990 */
 991		.align	2
 992		.type	proc_types,#object
 993proc_types:
 994		.word	0x41000000		@ old ARM ID
 995		.word	0xff00f000
 996		mov	pc, lr
 997 THUMB(		nop				)
 998		mov	pc, lr
 999 THUMB(		nop				)
1000		mov	pc, lr
1001 THUMB(		nop				)
1002
1003		.word	0x41007000		@ ARM7/710
1004		.word	0xfff8fe00
1005		mov	pc, lr
1006 THUMB(		nop				)
1007		mov	pc, lr
1008 THUMB(		nop				)
1009		mov	pc, lr
1010 THUMB(		nop				)
1011
1012		.word	0x41807200		@ ARM720T (writethrough)
1013		.word	0xffffff00
1014		W(b)	__armv4_mmu_cache_on
1015		W(b)	__armv4_mmu_cache_off
1016		mov	pc, lr
1017 THUMB(		nop				)
1018
1019		.word	0x41007400		@ ARM74x
1020		.word	0xff00ff00
1021		W(b)	__armv3_mpu_cache_on
1022		W(b)	__armv3_mpu_cache_off
1023		W(b)	__armv3_mpu_cache_flush
1024		
1025		.word	0x41009400		@ ARM94x
1026		.word	0xff00ff00
1027		W(b)	__armv4_mpu_cache_on
1028		W(b)	__armv4_mpu_cache_off
1029		W(b)	__armv4_mpu_cache_flush
1030
1031		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1032		.word	0xff0ffff0
1033		W(b)	__arm926ejs_mmu_cache_on
1034		W(b)	__armv4_mmu_cache_off
1035		W(b)	__armv5tej_mmu_cache_flush
1036
1037		.word	0x00007000		@ ARM7 IDs
1038		.word	0x0000f000
1039		mov	pc, lr
1040 THUMB(		nop				)
1041		mov	pc, lr
1042 THUMB(		nop				)
1043		mov	pc, lr
1044 THUMB(		nop				)
1045
1046		@ Everything from here on will be the new ID system.
1047
1048		.word	0x4401a100		@ sa110 / sa1100
1049		.word	0xffffffe0
1050		W(b)	__armv4_mmu_cache_on
1051		W(b)	__armv4_mmu_cache_off
1052		W(b)	__armv4_mmu_cache_flush
1053
1054		.word	0x6901b110		@ sa1110
1055		.word	0xfffffff0
1056		W(b)	__armv4_mmu_cache_on
1057		W(b)	__armv4_mmu_cache_off
1058		W(b)	__armv4_mmu_cache_flush
1059
1060		.word	0x56056900
1061		.word	0xffffff00		@ PXA9xx
1062		W(b)	__armv4_mmu_cache_on
1063		W(b)	__armv4_mmu_cache_off
1064		W(b)	__armv4_mmu_cache_flush
1065
1066		.word	0x56158000		@ PXA168
1067		.word	0xfffff000
1068		W(b)	__armv4_mmu_cache_on
1069		W(b)	__armv4_mmu_cache_off
1070		W(b)	__armv5tej_mmu_cache_flush
1071
1072		.word	0x56050000		@ Feroceon
1073		.word	0xff0f0000
1074		W(b)	__armv4_mmu_cache_on
1075		W(b)	__armv4_mmu_cache_off
1076		W(b)	__armv5tej_mmu_cache_flush
1077
1078#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1079		/* this conflicts with the standard ARMv5TE entry */
1080		.long	0x41009260		@ Old Feroceon
1081		.long	0xff00fff0
1082		b	__armv4_mmu_cache_on
1083		b	__armv4_mmu_cache_off
1084		b	__armv5tej_mmu_cache_flush
1085#endif
1086
1087		.word	0x66015261		@ FA526
1088		.word	0xff01fff1
1089		W(b)	__fa526_cache_on
1090		W(b)	__armv4_mmu_cache_off
1091		W(b)	__fa526_cache_flush
1092
1093		@ These match on the architecture ID
1094
1095		.word	0x00020000		@ ARMv4T
1096		.word	0x000f0000
1097		W(b)	__armv4_mmu_cache_on
1098		W(b)	__armv4_mmu_cache_off
1099		W(b)	__armv4_mmu_cache_flush
1100
1101		.word	0x00050000		@ ARMv5TE
1102		.word	0x000f0000
1103		W(b)	__armv4_mmu_cache_on
1104		W(b)	__armv4_mmu_cache_off
1105		W(b)	__armv4_mmu_cache_flush
1106
1107		.word	0x00060000		@ ARMv5TEJ
1108		.word	0x000f0000
1109		W(b)	__armv4_mmu_cache_on
1110		W(b)	__armv4_mmu_cache_off
1111		W(b)	__armv5tej_mmu_cache_flush
1112
1113		.word	0x0007b000		@ ARMv6
1114		.word	0x000ff000
1115		W(b)	__armv6_mmu_cache_on
1116		W(b)	__armv4_mmu_cache_off
1117		W(b)	__armv6_mmu_cache_flush
1118
1119		.word	0x000f0000		@ new CPU Id
1120		.word	0x000f0000
1121		W(b)	__armv7_mmu_cache_on
1122		W(b)	__armv7_mmu_cache_off
1123		W(b)	__armv7_mmu_cache_flush
1124
1125		.word	0			@ unrecognised type
1126		.word	0
1127		mov	pc, lr
1128 THUMB(		nop				)
1129		mov	pc, lr
1130 THUMB(		nop				)
1131		mov	pc, lr
1132 THUMB(		nop				)
1133
1134		.size	proc_types, . - proc_types
1135
1136		/*
1137		 * If you get a "non-constant expression in ".if" statement"
1138		 * error from the assembler on this line, check that you have
1139		 * not accidentally written a "b" instruction where you should
1140		 * have written W(b).
1141		 */
1142		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1143		.error "The size of one or more proc_types entries is wrong."
1144		.endif
1145
1146/*
1147 * Turn off the Cache and MMU.  ARMv3 does not support
1148 * reading the control register, but ARMv4 does.
1149 *
1150 * On exit,
1151 *  r0, r1, r2, r3, r9, r12 corrupted
1152 * This routine must preserve:
1153 *  r4, r7, r8
1154 */
1155		.align	5
1156cache_off:	mov	r3, #12			@ cache_off function
1157		b	call_cache_fn
1158
1159__armv4_mpu_cache_off:
1160		mrc	p15, 0, r0, c1, c0
1161		bic	r0, r0, #0x000d
1162		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1163		mov	r0, #0
1164		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1165		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1166		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1167		mov	pc, lr
1168
1169__armv3_mpu_cache_off:
1170		mrc	p15, 0, r0, c1, c0
1171		bic	r0, r0, #0x000d
1172		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1173		mov	r0, #0
1174		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1175		mov	pc, lr
1176
1177__armv4_mmu_cache_off:
1178#ifdef CONFIG_MMU
1179		mrc	p15, 0, r0, c1, c0
1180		bic	r0, r0, #0x000d
1181		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1182		mov	r0, #0
1183		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1184		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1185#endif
1186		mov	pc, lr
1187
1188__armv7_mmu_cache_off:
1189		mrc	p15, 0, r0, c1, c0
1190#ifdef CONFIG_MMU
1191		bic	r0, r0, #0x0005
1192#else
1193		bic	r0, r0, #0x0004
1194#endif
1195		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1196		mov	r0, #0
1197#ifdef CONFIG_MMU
1198		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1199#endif
1200		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1201		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1202		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1203		mov	pc, lr
1204
1205/*
1206 * Clean and flush the cache to maintain consistency.
1207 *
1208 * On entry,
1209 *  r0 = start address
1210 *  r1 = end address (exclusive)
1211 * On exit,
1212 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1213 * This routine must preserve:
1214 *  r4, r6, r7, r8
1215 */
1216		.align	5
1217cache_clean_flush:
1218		mov	r3, #16
1219		mov	r11, r1
1220		b	call_cache_fn
1221
1222__armv4_mpu_cache_flush:
1223		tst	r4, #1
1224		movne	pc, lr
1225		mov	r2, #1
1226		mov	r3, #0
1227		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1228		mov	r1, #7 << 5		@ 8 segments
12291:		orr	r3, r1, #63 << 26	@ 64 entries
12302:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1231		subs	r3, r3, #1 << 26
1232		bcs	2b			@ entries 63 to 0
1233		subs 	r1, r1, #1 << 5
1234		bcs	1b			@ segments 7 to 0
1235
1236		teq	r2, #0
1237		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1238		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1239		mov	pc, lr
1240		
1241__fa526_cache_flush:
1242		tst	r4, #1
1243		movne	pc, lr
1244		mov	r1, #0
1245		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1246		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1247		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1248		mov	pc, lr
1249
1250__armv6_mmu_cache_flush:
1251		mov	r1, #0
1252		tst	r4, #1
1253		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1254		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1255		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1256		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1257		mov	pc, lr
1258
1259__armv7_mmu_cache_flush:
1260		enable_cp15_barriers	r10
1261		tst	r4, #1
1262		bne	iflush
1263		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1264		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1265		mov	r10, #0
1266		beq	hierarchical
1267		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1268		b	iflush
1269hierarchical:
1270		dcache_line_size r1, r2		@ r1 := dcache min line size
1271		sub	r2, r1, #1		@ r2 := line size mask
1272		bic	r0, r0, r2		@ round down start to line size
1273		sub	r11, r11, #1		@ end address is exclusive
1274		bic	r11, r11, r2		@ round down end to line size
12750:		cmp	r0, r11			@ finished?
1276		bgt	iflush
1277		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1278		add	r0, r0, r1
1279		b	0b
1280iflush:
1281		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1282		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1283		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1284		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1285		mov	pc, lr
1286
1287__armv5tej_mmu_cache_flush:
1288		tst	r4, #1
1289		movne	pc, lr
12901:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1291		bne	1b
1292		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1293		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1294		mov	pc, lr
1295
1296__armv4_mmu_cache_flush:
1297		tst	r4, #1
1298		movne	pc, lr
1299		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1300		mov	r11, #32		@ default: 32 byte line size
1301		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1302		teq	r3, r9			@ cache ID register present?
1303		beq	no_cache_id
1304		mov	r1, r3, lsr #18
1305		and	r1, r1, #7
1306		mov	r2, #1024
1307		mov	r2, r2, lsl r1		@ base dcache size *2
1308		tst	r3, #1 << 14		@ test M bit
1309		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1310		mov	r3, r3, lsr #12
1311		and	r3, r3, #3
1312		mov	r11, #8
1313		mov	r11, r11, lsl r3	@ cache line size in bytes
1314no_cache_id:
1315		mov	r1, pc
1316		bic	r1, r1, #63		@ align to longest cache line
1317		add	r2, r1, r2
13181:
1319 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1320 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1321 THUMB(		add     r1, r1, r11		)
1322		teq	r1, r2
1323		bne	1b
1324
1325		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1326		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1327		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1328		mov	pc, lr
1329
1330__armv3_mmu_cache_flush:
1331__armv3_mpu_cache_flush:
1332		tst	r4, #1
1333		movne	pc, lr
1334		mov	r1, #0
1335		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1336		mov	pc, lr
1337
1338/*
1339 * Various debugging routines for printing hex characters and
1340 * memory, which again must be relocatable.
1341 */
1342#ifdef DEBUG
1343		.align	2
1344		.type	phexbuf,#object
1345phexbuf:	.space	12
1346		.size	phexbuf, . - phexbuf
1347
1348@ phex corrupts {r0, r1, r2, r3}
1349phex:		adr	r3, phexbuf
1350		mov	r2, #0
1351		strb	r2, [r3, r1]
13521:		subs	r1, r1, #1
1353		movmi	r0, r3
1354		bmi	puts
1355		and	r2, r0, #15
1356		mov	r0, r0, lsr #4
1357		cmp	r2, #10
1358		addge	r2, r2, #7
1359		add	r2, r2, #'0'
1360		strb	r2, [r3, r1]
1361		b	1b
1362
1363@ puts corrupts {r0, r1, r2, r3}
1364puts:		loadsp	r3, r2, r1
13651:		ldrb	r2, [r0], #1
1366		teq	r2, #0
1367		moveq	pc, lr
13682:		writeb	r2, r3, r1
1369		mov	r1, #0x00020000
13703:		subs	r1, r1, #1
1371		bne	3b
1372		teq	r2, #'\n'
1373		moveq	r2, #'\r'
1374		beq	2b
1375		teq	r0, #0
1376		bne	1b
1377		mov	pc, lr
1378@ putc corrupts {r0, r1, r2, r3}
1379putc:
1380		mov	r2, r0
1381		loadsp	r3, r1, r0
1382		mov	r0, #0
1383		b	2b
1384
1385@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1386memdump:	mov	r12, r0
1387		mov	r10, lr
1388		mov	r11, #0
13892:		mov	r0, r11, lsl #2
1390		add	r0, r0, r12
1391		mov	r1, #8
1392		bl	phex
1393		mov	r0, #':'
1394		bl	putc
13951:		mov	r0, #' '
1396		bl	putc
1397		ldr	r0, [r12, r11, lsl #2]
1398		mov	r1, #8
1399		bl	phex
1400		and	r0, r11, #7
1401		teq	r0, #3
1402		moveq	r0, #' '
1403		bleq	putc
1404		and	r0, r11, #7
1405		add	r11, r11, #1
1406		teq	r0, #7
1407		bne	1b
1408		mov	r0, #'\n'
1409		bl	putc
1410		cmp	r11, #64
1411		blt	2b
1412		mov	pc, r10
1413#endif
1414
1415		.ltorg
1416
1417#ifdef CONFIG_ARM_VIRT_EXT
1418.align 5
1419__hyp_reentry_vectors:
1420		W(b)	.			@ reset
1421		W(b)	.			@ undef
1422#ifdef CONFIG_EFI_STUB
1423		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1424#else
1425		W(b)	.			@ svc
1426#endif
1427		W(b)	.			@ pabort
1428		W(b)	.			@ dabort
1429		W(b)	__enter_kernel		@ hyp
1430		W(b)	.			@ irq
1431		W(b)	.			@ fiq
1432#endif /* CONFIG_ARM_VIRT_EXT */
1433
1434__enter_kernel:
1435		mov	r0, #0			@ must be 0
1436		mov	r1, r7			@ restore architecture number
1437		mov	r2, r8			@ restore atags pointer
1438 ARM(		mov	pc, r4		)	@ call kernel
1439 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1440 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1441
1442reloc_code_end:
1443
1444#ifdef CONFIG_EFI_STUB
1445__enter_kernel_from_hyp:
1446		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1447		bic	r0, r0, #0x5		@ disable MMU and caches
1448		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1449		isb
1450		b	__enter_kernel
1451
1452ENTRY(efi_enter_kernel)
1453		mov	r4, r0			@ preserve image base
1454		mov	r8, r1			@ preserve DT pointer
1455
1456		adr_l	r0, call_cache_fn
1457		adr	r1, 0f			@ clean the region of code we
1458		bl	cache_clean_flush	@ may run with the MMU off
1459
1460#ifdef CONFIG_ARM_VIRT_EXT
1461		@
1462		@ The EFI spec does not support booting on ARM in HYP mode,
1463		@ since it mandates that the MMU and caches are on, with all
1464		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1465		@
1466		@ While the EDK2 reference implementation adheres to this,
1467		@ U-Boot might decide to enter the EFI stub in HYP mode
1468		@ anyway, with the MMU and caches either on or off.
1469		@
1470		mrs	r0, cpsr		@ get the current mode
1471		msr	spsr_cxsf, r0		@ record boot mode
1472		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1473		cmp	r0, #HYP_MODE
1474		bne	.Lefi_svc
1475
1476		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1477		tst	r1, #0x1		@ MMU enabled at HYP?
1478		beq	1f
1479
1480		@
1481		@ When running in HYP mode with the caches on, we're better
1482		@ off just carrying on using the cached 1:1 mapping that the
1483		@ firmware provided. Set up the HYP vectors so HVC instructions
1484		@ issued from HYP mode take us to the correct handler code. We
1485		@ will disable the MMU before jumping to the kernel proper.
1486		@
1487 ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
1488 THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
1489		mcr	p15, 4, r1, c1, c0, 0
1490		adr	r0, __hyp_reentry_vectors
1491		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1492		isb
1493		b	.Lefi_hyp
1494
1495		@
1496		@ When running in HYP mode with the caches off, we need to drop
1497		@ into SVC mode now, and let the decompressor set up its cached
1498		@ 1:1 mapping as usual.
1499		@
15001:		mov	r9, r4			@ preserve image base
1501		bl	__hyp_stub_install	@ install HYP stub vectors
1502		safe_svcmode_maskall	r1	@ drop to SVC mode
1503		msr	spsr_cxsf, r0		@ record boot mode
1504		orr	r4, r9, #1		@ restore image base and set LSB
1505		b	.Lefi_hyp
1506.Lefi_svc:
1507#endif
1508		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1509		tst	r0, #0x1		@ MMU enabled?
1510		orreq	r4, r4, #1		@ set LSB if not
1511
1512.Lefi_hyp:
1513		mov	r0, r8			@ DT start
1514		add	r1, r8, r2		@ DT end
1515		bl	cache_clean_flush
1516
1517		adr	r0, 0f			@ switch to our stack
1518		ldr	sp, [r0]
1519		add	sp, sp, r0
1520
1521		mov	r5, #0			@ appended DTB size
1522		mov	r7, #0xFFFFFFFF		@ machine ID
1523		b	wont_overwrite
1524ENDPROC(efi_enter_kernel)
15250:		.long	.L_user_stack_end - .
1526#endif
1527
1528		.align
1529		.section ".stack", "aw", %nobits
1530.L_user_stack:	.space	4096
1531.L_user_stack_end: