1;==================================================================
2; Copyright ARM Ltd 2005. All rights reserved.
3;
4; Cortex-A8 Dhrystone example - Startup Code
5;==================================================================
6
7        PRESERVE8
8        AREA   CORTEXA8, CODE, READONLY
9
10        ENTRY
11
12; Standard definitions of mode bits and interrupt (I & F) flags in PSRs
13
14Mode_USR        EQU     0x10
15Mode_FIQ        EQU     0x11
16Mode_IRQ        EQU     0x12
17Mode_SVC        EQU     0x13
18Mode_ABT        EQU     0x17
19Mode_UNDEF      EQU     0x1B
20Mode_SYS        EQU     0x1F
21
22I_Bit           EQU     0x80 ; when I bit is set, IRQ is disabled
23F_Bit           EQU     0x40 ; when F bit is set, FIQ is disabled
24
25;==================================================================
26; Disable Cortex-A8 MMU if enabled
27;==================================================================
28
29        EXPORT Start
30
31Start
32
33        MRC     p15, 0, r0, c1, c0, 0       ; Read CP15 Control Register into r0
34        TST     r0, #0x1                    ; Is the MMU enabled?
35        BICNE   r0, r0, #0x1                ; Clear bit 0
36        MCRNE   p15, 0, r0, c1, c0, 0       ; Write value back
37
38;==================================================================
39; Initialise Supervisor Mode Stack
40; Note stack must be 8 byte aligned.
41;==================================================================
42
43        IMPORT  ||Image$$STACK$$ZI$$Limit|| ; Linker symbol from scatter file
44        LDR     SP, =||Image$$STACK$$ZI$$Limit||
45
46;==================================================================
47; TLB maintenance, Invalidate Data and Instruction TLB's
48;==================================================================
49
50        MOV    r0,#0
51        MCR    p15, 0, r0, c8, c7, 0 ; Cortex-A8 I-TLB and D-TLB invalidation
52
53;==================================================================
54; Cache Invalidation code for Cortex-A8
55;==================================================================
56
57        ; Invalidate L1 Instruction Cache
58
59        MRC p15, 1, r0, c0, c0, 1   ; Read CLIDR
60        TST r0, #0x3                ; Harvard Cache?
61        MOV r0, #0
62        MCRNE p15, 0, r0, c7, c5, 0 ; Invalidate Instruction Cache
63
64        ; Invalidate Data/Unified Caches
65
66        MRC p15, 1, r0, c0, c0, 1   ; Read CLIDR
67        ANDS r3, r0, #&7000000
68        MOV r3, r3, LSR #23         ; Total cache levels << 1
69        BEQ Finished
70
71        MOV r10, #0                 ; R10 holds current cache level << 1
72Loop1   ADD r2, r10, r10, LSR #1    ; R2 holds cache "Set" position
73        MOV r1, r0, LSR r2          ; Bottom 3 bits are the Cache-type for this level
74        AND r1, R1, #7              ; Get those 3 bits alone
75        CMP r1, #2
76        BLT Skip                    ; No cache or only instruction cache at this level
77
78        MCR p15, 2, r10, c0, c0, 0  ; Write the Cache Size selection register
79        MOV r1, #0
80        MCR p15, 0, r1, c7, c5, 4   ; PrefetchFlush to sync the change to the CacheSizeID reg
81        MRC p15, 1, r1, c0, c0, 0   ; Reads current Cache Size ID register
82        AND r2, r1, #&7             ; Extract the line length field
83        ADD r2, r2, #4              ; Add 4 for the line length offset (log2 16 bytes)
84        LDR r4, =0x3FF
85        ANDS r4, r4, r1, LSR #3     ; R4 is the max number on the way size (right aligned)
86        CLZ r5, r4                  ; R5 is the bit position of the way size increment
87        LDR r7, =0x00007FFF
88        ANDS r7, r7, r1, LSR #13    ; R7 is the max number of the index size (right aligned)
89
90Loop2   MOV r9, r4                  ; R9 working copy of the max way size (right aligned)
91
92Loop3   ORR r11, r10, r9, LSL r5    ; Factor in the Way number and cache number into R11
93        ORR r11, r11, r7, LSL r2    ; Factor in the Set number
94        MCR p15, 0, r11, c7, c14, 2 ; Clean and Invalidate by set/way
95        SUBS r9, r9, #1             ; Decrement the Way number
96        BGE Loop3
97        SUBS r7, r7, #1             ; Decrement the Set number
98        BGE Loop2
99Skip    ADD r10, r10, #2            ; increment the cache number
100        CMP r3, r10
101        BGT Loop1
102
103Finished
104
105
106;===================================================================
107; Cortex-A8 MMU Configuration
108; Set translation table base
109;===================================================================
110
111
112        IMPORT ||Image$$TTB$$ZI$$Base||  ; from scatter file.;
113
114        ; Cortex-A8 supports two translation tables
115        ; Configure translation table base (TTB) control register cp15,c2
116        ; to a value of all zeros, indicates we are using TTB register 0.
117
118        MOV     r0,#0x0
119        MCR     p15, 0, r0, c2, c0, 2
120
121        ; write the address of our page table base to TTB register 0.;
122        ; We are setting to outer-noncachable [4:3] is zero
123
124        LDR     r0,=||Image$$TTB$$ZI$$Base||
125        MCR     p15, 0, r0, c2, c0, 0
126
127
128;===================================================================
129; Cortex-A8 PAGE TABLE generation, using standard Arch v6 tables
130;
131; AP[11:10]   - Access Permissions = b11, Read/Write Access
132; Domain[8:5] - Domain = b1111, Domain 15
133; Type[1:0]   - Descriptor Type = b10, 1Mb descriptors
134;
135; TEX  C  B
136; 000  0  0  Strongly Ordered
137; 001  1  1  Outer and inner write back, write allocate Normal
138;===================================================================
139
140        LDR     r1,=0xfff                   ; loop counter
141        LDR     r2,=2_00000000000000000000110111100010
142
143        ; r0 contains the address of the translation table base
144        ; r1 is loop counter
145        ; r2 is level1 descriptor (bits 19:0)
146
147        ; use loop counter to create 4096 individual table entries
148        ; this writes from address 0x7FFC down to 0x4000 in word steps (4bytes).
149
150init_ttb_1
151
152        ORR     r3, r2, r1, LSL#20          ; r3 now contains full level1 descriptor to write
153        STR     r3, [r0, r1, LSL#2]         ; str table entry at TTB base + loopcount*4
154        SUBS    r1, r1, #1                  ; decrement loop counter
155        BPL     init_ttb_1
156
157        ; In this example we will change the cacheable attribute in the first descriptor.
158        ; Virtual memory from 0 to 1MB will be cacheable (write back mode).
159        ; TEX[14:12]=001 and CB[3:2]= 11, Outer and inner write back, write allocate.
160
161        ORR     r3,r3,#2_0000000001100      ; Set CB bits
162        ORR     r3,r3,#2_1000000000000      ; Set TEX bits
163        STR     r3,[r0]
164
165	ADD r2, r3, #0x100000               ; alter r3 to have correct base address for second descriptor (flat mapping)
166	STR r2, [r0, #4]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
167
168	ADD r2, r3, #0x200000               ; alter r3 to have correct base address for 3 descriptor (flat mapping)
169	STR r2, [r0, #8]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
170
171	ADD r2, r3, #0x300000               ; alter r3 to have correct base address for 4 descriptor (flat mapping)
172	STR r2, [r0, #0xc]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
173
174	ADD r2, r3, #0x400000               ; alter r3 to have correct base address for 5 descriptor (flat mapping)
175	STR r2, [r0, #0x10]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
176
177	ADD r2, r3, #0x500000               ; alter r3 to have correct base address for 6 descriptor (flat mapping)
178	STR r2, [r0, #0x14]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
179
180	ADD r2, r3, #0x600000               ; alter r3 to have correct base address for 7 descriptor (flat mapping)
181	STR r2, [r0, #0x18]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
182
183	ADD r2, r3, #0x700000               ; alter r3 to have correct base address for 8 descriptor (flat mapping)
184	STR r2, [r0, #0x1c]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
185
186	ADD r2, r3, #0x800000               ; alter r3 to have correct base address for 9 descriptor (flat mapping)
187	STR r2, [r0, #0x20]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
188
189	ADD r2, r3, #0x900000               ; alter r3 to have correct base address for 10 descriptor (flat mapping)
190	STR r2, [r0, #0x24]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
191
192	ADD r2, r3, #0xa00000               ; alter r3 to have correct base address for 11 descriptor (flat mapping)
193	STR r2, [r0, #0x28]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
194
195	ADD r2, r3, #0xb00000               ; alter r3 to have correct base address for 12 descriptor (flat mapping)
196	STR r2, [r0, #0x2c]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
197
198	ADD r2, r3, #0xc00000               ; alter r3 to have correct base address for 13 descriptor (flat mapping)
199	STR r2, [r0, #0x30]                    ; store the new descriptor at r0 + 4 (overwrite second section descriptor)
200
201;===================================================================
202; Setup domain control register - Enable all domains to client mode
203;===================================================================
204
205        MRC     p15, 0, r0, c3, c0, 0     ; Read Domain Access Control Register
206        LDR     r0, =0x55555555           ; Initialize every domain entry to b01 (client)
207        MCR     p15, 0, r0, c3, c0, 0     ; Write Domain Access Control Register
208
209;===================================================================
210; Setup L2 Cache - L2 Cache Auxiliary Control
211;===================================================================
212
213        MOV     r0, #0
214        ;MCR     p15, 1, r0, c9, c0, 2      ; Write L2 Auxilary Control Register
215
216;==================================================================
217; Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11.
218; Enables Full Access i.e. in both priv and non priv modes
219;==================================================================
220
221        MRC     p15, 0, r0, c1, c0, 2      ; read CP access register
222        ORR     r0, r0, #(0x3  <<20)       ; enable access CP 10
223        ORR     r0, r0, #(0x3  <<22)       ; enable access CP 11
224        MCR     p15, 0, r0, c1, c0, 2      ; write CP access register back
225
226;==================================================================
227; Switch on the VFP and Neon Hardware
228;=================================================================
229
230        MOV     r0, #0                      ; Set up a register
231        ORR     r0, r0, #(0x1 << 30)
232        FMXR    FPEXC, r0                   ; Write FPEXC register, EN bit set.
233
234;===================================================================
235; Enable MMU and Branch to __main
236;===================================================================
237
238        IMPORT  __main                      ; before MMU enabled import label to __main
239        LDR     r12,=__main                 ; save this in register for possible long jump
240
241
242        MRC     p15, 0, r0, c1, c0, 0       ; read CP15 register 1 into r0
243        ORR     r0, r0, #0x1                ; enable MMU before scatter loading
244        MCR     p15, 0, r0, c1, c0, 0       ; write CP15 register 1
245
246
247; Now the MMU is enabled, virtual to physical address translations will occur.
248; This will affect the next instruction fetches.
249;
250; The two instructions currently in the ARM pipeline will have been fetched
251; before the MMU was enabled. This property is useful because the next two
252; instructions are safe even if new instruction fetches fail. If this routine
253; was mapped out of the new virtual memory map, the branch to __main would
254; still succeed.
255
256        BX      r12                 ; branch to __main  C library entry point
257
258        END                         ; mark the end of this file
259
260