// Part of the Raspberry-Pi Bare Metal Tutorials // Copyright (c) 2013-2015, Brian Sidebotham // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // Relocate to just below 32MB #include "defs.h" .equ STACK_SIZE, 0x00100000 .equ C0_SVR_STACK, 0 .equ C0_IRQ_STACK, (STACK_SIZE*1) .equ C0_FIQ_STACK, STACK_SIZE*2 .equ C0_USER_STACK, STACK_SIZE*4 .equ C0_ABORT_STACK, STACK_SIZE*5 .equ C0_UNDEFINED_STACK, STACK_SIZE*6 #if defined(RPI2) || defined(RPI3) .equ C1_SVR_STACK, STACK_SIZE*7 .equ C1_IRQ_STACK, STACK_SIZE*8 .equ C1_FIQ_STACK, STACK_SIZE*9 .equ C1_USER_STACK, STACK_SIZE*10 .equ C1_ABORT_STACK, STACK_SIZE*11 .equ C1_UNDEFINED_STACK, STACK_SIZE*12 #endif .equ SCTLR_ENABLE_DATA_CACHE, 0x4 .equ SCTLR_ENABLE_BRANCH_PREDICTION, 0x800 .equ SCTLR_ENABLE_INSTRUCTION_CACHE, 0x1000 .section ".text.startup" .global _start .global _get_cpsr .global _get_stack_pointer .global _exception_table .global _enable_interrupts .global _disable_interrupts .global _enable_unaligned_access .global _enable_l1_cache .global _invalidate_icache .global _invalidate_dcache .global _clean_invalidate_dcache .global _invalidate_dcache_mva .global _clean_invalidate_dcache_mva .global _invalidate_dtlb .global _invalidate_dtlb_mva .global _data_memory_barrier #ifdef HAS_MULTICORE .global _get_core .global _init_core .global _spin_core #endif #if defined(HAS_40PINS) .global _toggle_test_pin #endif // From the ARM ARM (Architecture Reference Manual). Make sure you get the // ARMv5 documentation which includes the ARMv6 documentation which is the // correct processor type for the Broadcom BCM2835. The ARMv6-M manuals // available on the ARM website are for Cortex-M parts only and are very // different. // // See ARM section A2.2 (Processor Modes) .equ CPSR_MODE_USER, 0x10 .equ CPSR_MODE_FIQ, 0x11 .equ CPSR_MODE_IRQ, 0x12 .equ CPSR_MODE_SVR, 0x13 .equ CPSR_MODE_ABORT, 0x17 .equ CPSR_MODE_HYP, 0x1A .equ CPSR_MODE_UNDEFINED, 0x1B .equ CPSR_MODE_SYSTEM, 0x1F .equ CPSR_MODE_MASK, 0x1F // See ARM section A2.5 (Program status registers) .equ CPSR_A_BIT, 0x100 .equ CPSR_IRQ_INHIBIT, 0x80 .equ CPSR_FIQ_INHIBIT, 0x40 .equ CPSR_THUMB, 0x20 _start: ldr pc, _reset_h ldr pc, _undefined_instruction_vector_h ldr pc, _software_interrupt_vector_h ldr pc, _prefetch_abort_vector_h ldr pc, _data_abort_vector_h ldr pc, _unused_handler_h ldr pc, _interrupt_vector_h ldr pc, _fast_interrupt_vector_h _reset_h: .word _reset_ _undefined_instruction_vector_h: .word _undefined_instruction_handler_ _software_interrupt_vector_h: .word _swi_handler_ _prefetch_abort_vector_h: .word _prefetch_abort_handler_ _data_abort_vector_h: .word _data_abort_handler_ _unused_handler_h: .word _reset_ _interrupt_vector_h: .word arm_irq_handler _fast_interrupt_vector_h: .word arm_fiq_handler .section ".text._reset_" _reset_: BL _enable_l1_cache #ifdef HAS_MULTICORE #ifdef KERNEL_OLD // if kernel_old=1 all cores are running and we need to sleep 1-3 // if kernel_old=0 then just core0 is running, and core 1-3 are waiting // on a mailbox write to be woken up. // // Test which core we are running on mrc p15, 0, r0, c0, c0, 5 ands r0, #3 beq _core_continue // Put cores 1-3 into a tight loop _core_loop: wfi b _core_loop _core_continue: #else // if kernel_old=0 enter in HYP mode and need to force a switch to SVC mode // // for now we assume kernel_old=1 and don't execute this core // // The logs show: // SVC mode: cpsr ends with 1d3 // HYP mode: cpsr ends with 1a3 mrs r0, cpsr eor r0, r0, #CPSR_MODE_HYP tst r0, #CPSR_MODE_MASK bic r0 , r0 , #CPSR_MODE_MASK orr r0 , r0 , #CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT | CPSR_MODE_SVR bne _not_in_hyp_mode orr r0, r0, #CPSR_A_BIT adr lr, _reset_continue msr spsr_cxsf, r0 .word 0xE12EF30E // msr_elr_hyp lr .word 0xE160006E // eret _not_in_hyp_mode: msr cpsr_c, r0 _reset_continue: #endif #endif // We enter execution in supervisor mode. For more information on // processor modes see ARM Section A2.2 (Processor Modes) ldr r0,=_start mov r1, #0x00000000 ldmia r0!,{r2, r3, r4, r5, r6, r7, r8, r9} stmia r1!,{r2, r3, r4, r5, r6, r7, r8, r9} ldmia r0!,{r2, r3, r4, r5, r6, r7, r8, r9} stmia r1!,{r2, r3, r4, r5, r6, r7, r8, r9} // Initialise Stack Pointers --------------------------------------------- ldr r4,=_start // We're going to use interrupt mode, so setup the interrupt mode // stack pointer which differs to the application stack pointer: mov r0, #(CPSR_MODE_IRQ | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, #C0_IRQ_STACK // Also setup the stack used for FIQs mov r0, #(CPSR_MODE_FIQ | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, #C0_FIQ_STACK // Also setup the stack used for undefined exceptions mov r0, #(CPSR_MODE_UNDEFINED | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, #C0_UNDEFINED_STACK // Also setup the stack used for prefetch and data abort exceptions mov r0, #(CPSR_MODE_ABORT | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, #C0_ABORT_STACK // Finally, a user/system mode stack, although the application will likely reset this mov r0, #(CPSR_MODE_SYSTEM | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, #C0_USER_STACK // Switch back to supervisor mode (our application mode) and // set the stack pointer. Remember that the stack works its way // down memory, our heap will work it's way up from after the // application. mov r0, #(CPSR_MODE_SVR | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, #C0_SVR_STACK // Enable VFP ------------------------------------------------------------ #ifdef HAS_MULTICORE //1. Set the CPACR for access to CP10 and CP11, and clear the ASEDIS and D32DIS bits: ldr r0, =(0xf << 20) mcr p15, 0, r0, c1, c0, 2 // 2. Set the FPEXC EN bit to enable the NEON MPE: mov r0, #0x40000000 vmsr fpexc, r0 #else // r1 = Access Control Register MRC p15, #0, r1, c1, c0, #2 // enable full access for p10,11 ORR r1, r1, #(0xf << 20) // ccess Control Register = r1 MCR p15, #0, r1, c1, c0, #2 MOV r1, #0 // flush prefetch buffer because of FMXR below MCR p15, #0, r1, c7, c5, #4 // and CP 10 & 11 were only just enabled // Enable VFP itself MOV r0,#0x40000000 // FPEXC = r0 FMXR FPEXC, r0 #endif // The c-startup function which we never return from. This function will // initialise the ro data section (most things that have the const // declaration) and initialise the bss section variables to 0 (generally // known as automatics). It'll then call main b _cstartup arm_fiq_handler: arm_irq_handler: //subs pc, lr, #4 sub lr, lr, #4 /* lr: return address */ stmfd sp!, {r0-r12, lr} /* save r0-r12 and return address */ bl InterruptHandler ldmfd sp!, {r0-r12, pc}^ /* restore registers and return */ .section ".text._get_stack_pointer" _get_stack_pointer: mov r0, sp mov pc, lr .section ".text._get_cpsr" _get_cpsr: mrs r0, cpsr mov pc, lr .section ".text._enable_interrupts" _enable_interrupts: mrs r0, cpsr bic r0, r0, #CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT msr cpsr_c, r0 mov pc, lr .section ".text._disable_interrupts" _disable_interrupts: mrs r0, cpsr orr r1, r0, #CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT msr cpsr_c, r1 mov pc, lr .section ".text._undefined_instruction_handler_" _undefined_instruction_handler_: stmfd sp!, {r0-r12, lr} mrs r0, spsr // Get spsr. stmfd sp!, {r0} // Store spsr onto stack. mov r0, sp bl undefined_instruction_handler .section ".text._prefetch_abort_handler_" _prefetch_abort_handler_: stmfd sp!, {r0-r12, lr} mrs r0, spsr // Get spsr. stmfd sp!, {r0} // Store spsr onto stack. mov r0, sp bl prefetch_abort_handler .section ".text._data_abort_handler_" _data_abort_handler_: stmfd sp!, {r0-r12, lr} mrs r0, spsr // Get spsr. stmfd sp!, {r0} // Store spsr onto stack. mov r0, sp bl data_abort_handler .section ".text._swi_handler_" _swi_handler_: stmfd sp!, {r0-r12, lr} mrs r0, spsr // Get spsr. stmfd sp!, {r0} // Store spsr onto stack. mov r0, sp bl swi_handler .section ".text._enable_unaligned_access" _enable_unaligned_access: mrc p15, 0, r0, c1, c0, 0 // read SCTLR bic r0, r0, #2 // A (no unaligned access fault) orr r0, r0, #1 << 22 // U (v6 unaligned access model) mcr p15, 0, r0, c1, c0, 0 // write SCTLR mov pc, lr // Enable L1 Cache ------------------------------------------------------- .section ".text._enable_l1_cache" _enable_l1_cache: // R0 = System Control Register mrc p15,0,r0,c1,c0,0 // Enable caches and branch prediction orr r0,#SCTLR_ENABLE_BRANCH_PREDICTION orr r0,#SCTLR_ENABLE_DATA_CACHE orr r0,#SCTLR_ENABLE_INSTRUCTION_CACHE // System Control Register = R0 mcr p15,0,r0,c1,c0,0 mov pc, lr .section ".text._invalidate_icache" _invalidate_icache: mov r0, #0 mcr p15, 0, r0, c7, c5, 0 mov pc, lr .section ".text._invalidate_dcache" _invalidate_dcache: mov r0, #0 mcr p15, 0, r0, c7, c6, 0 mov pc, lr .section ".text._clean_invalidate_dcache" _clean_invalidate_dcache: mov r0, #0 mcr p15, 0, r0, c7, c14, 0 mov pc, lr .section ".text._invalidate_dcache_mva" _invalidate_dcache_mva: mcr p15, 0, r0, c7, c6, 1 mov pc, lr .section ".text._clean_invalidate_dcache_mva" _clean_invalidate_dcache_mva: mcr p15, 0, r0, c7, c14, 1 mov pc, lr .section ".text._invalidate_dtlb" _invalidate_dtlb: mov r0, #0 mcr p15, 0, r0, c8, c6, 0 mov pc, lr .section ".text._invalidate_dtlb_mva" _invalidate_dtlb_mva: mcr p15, 0, r0, c8, c6, 1 mov pc, lr .section ".text._data_memory_barrier" _data_memory_barrier: #if defined(RPI2) || defined(RPI3) dmb #else mov r0, #0 mcr p15, 0, r0, c7, c10, 5 #endif mov pc, lr #ifdef USE_MULTICORE _init_core: // On a Raspberry Pi 2 we enter in HYP mode, and need to force a switch to supervisor mode mrs r0, cpsr eor r0, r0, #CPSR_MODE_HYP tst r0, #CPSR_MODE_MASK bic r0 , r0 , #CPSR_MODE_MASK orr r0 , r0 , #CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT | CPSR_MODE_SVR bne _init_not_in_hyp_mode orr r0, r0, #CPSR_A_BIT adr lr, _init_continue msr spsr_cxsf, r0 .word 0xE12EF30E // msr_elr_hyp lr .word 0xE160006E // eret _init_not_in_hyp_mode: msr cpsr_c, r0 _init_continue: ldr r4,=_start // Initialise Stack Pointers --------------------------------------------- // We're going to use interrupt mode, so setup the interrupt mode // stack pointer which differs to the application stack pointer: mov r0, #(CPSR_MODE_IRQ | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, # C1_IRQ_STACK // Also setup the stack used for FIQs mov r0, #(CPSR_MODE_FIQ | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, # C1_FIQ_STACK // Also setup the stack used for undefined exceptions mov r0, #(CPSR_MODE_UNDEFINED | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, # C1_UNDEFINED_STACK // Also setup the stack used for prefetch and data abort exceptions mov r0, #(CPSR_MODE_ABORT | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, # C1_ABORT_STACK // Finally, a user/system mode stack, although the application will likely reset this mov r0, #(CPSR_MODE_SYSTEM | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, # C1_USER_STACK // Switch back to supervisor mode (our application mode) and // set the stack pointer. Remember that the stack works its way // down memory, our heap will work it's way up from after the // application. mov r0, #(CPSR_MODE_SVR | CPSR_IRQ_INHIBIT | CPSR_FIQ_INHIBIT ) msr cpsr_c, r0 sub sp, r4, # C1_SVR_STACK // Enable VFP ------------------------------------------------------------ //1. Set the CPACR for access to CP10 and CP11, and clear the ASEDIS and D32DIS bits: ldr r0, =(0xf << 20) mcr p15, 0, r0, c1, c0, 2 // 2. Set the FPEXC EN bit to enable the NEON MPE: mov r0, #0x40000000 vmsr fpexc, r0 bl run_core #endif #ifdef HAS_MULTICORE // If main does return for some reason, just catch it and stay here. _spin_core: #ifdef DEBUG mov r0, #'S' bl RPI_AuxMiniUartWrite mov r0, #'P' bl RPI_AuxMiniUartWrite mov r0, #'I' bl RPI_AuxMiniUartWrite mov r0, #'N' bl RPI_AuxMiniUartWrite bl _get_core add r0, r0, #'0' bl RPI_AuxMiniUartWrite mov r0, #'\r' bl RPI_AuxMiniUartWrite mov r0, #'\n' bl RPI_AuxMiniUartWrite #endif _spin_core1: wfi b _spin_core1 _get_core: mrc p15, 0, r0, c0, c0, 5 and r0, #3 mov pc, lr #endif #ifdef HAS_40PINS .section ".text._toggle_test_pin" _toggle_test_pin: mov r1, #TEST_MASK _toggle_test_pin_loop: ldr r2, =GPSET0 str r1, [r2] ldr r2, =GPCLR0 str r1, [r2] subs r0, r0, #1 bne _toggle_test_pin_loop mov pc, lr #endif