1 /* 2 * MIPS DSPr2 optimizations for libjpeg-turbo 3 * 4 * Copyright (C) 2013, MIPS Technologies, Inc., California. 5 * All Rights Reserved. 6 * Authors: Teodora Novkovic (teodora.novkovic@imgtec.com) 7 * Darko Laus (darko.laus@imgtec.com) 8 * This software is provided 'as-is', without any express or implied 9 * warranty. In no event will the authors be held liable for any damages 10 * arising from the use of this software. 11 * 12 * Permission is granted to anyone to use this software for any purpose, 13 * including commercial applications, and to alter it and redistribute it 14 * freely, subject to the following restrictions: 15 * 16 * 1. The origin of this software must not be misrepresented; you must not 17 * claim that you wrote the original software. If you use this software 18 * in a product, an acknowledgment in the product documentation would be 19 * appreciated but is not required. 20 * 2. Altered source versions must be plainly marked as such, and must not be 21 * misrepresented as being the original software. 22 * 3. This notice may not be removed or altered from any source distribution. 23 */ 24 25 #define zero $0 26 #define AT $1 27 #define v0 $2 28 #define v1 $3 29 #define a0 $4 30 #define a1 $5 31 #define a2 $6 32 #define a3 $7 33 #define t0 $8 34 #define t1 $9 35 #define t2 $10 36 #define t3 $11 37 #define t4 $12 38 #define t5 $13 39 #define t6 $14 40 #define t7 $15 41 #define s0 $16 42 #define s1 $17 43 #define s2 $18 44 #define s3 $19 45 #define s4 $20 46 #define s5 $21 47 #define s6 $22 48 #define s7 $23 49 #define t8 $24 50 #define t9 $25 51 #define k0 $26 52 #define k1 $27 53 #define gp $28 54 #define sp $29 55 #define fp $30 56 #define s8 $30 57 #define ra $31 58 59 #define f0 $f0 60 #define f1 $f1 61 #define f2 $f2 62 #define f3 $f3 63 #define f4 $f4 64 #define f5 $f5 65 #define f6 $f6 66 #define f7 $f7 67 #define f8 $f8 68 #define f9 $f9 69 #define f10 $f10 70 #define f11 $f11 71 #define f12 $f12 72 #define f13 $f13 73 #define f14 $f14 74 #define f15 $f15 75 #define f16 $f16 76 #define f17 $f17 77 #define f18 $f18 78 #define f19 $f19 79 #define f20 $f20 80 #define f21 $f21 81 #define f22 $f22 82 #define f23 $f23 83 #define f24 $f24 84 #define f25 $f25 85 #define f26 $f26 86 #define f27 $f27 87 #define f28 $f28 88 #define f29 $f29 89 #define f30 $f30 90 #define f31 $f31 91 92 /* 93 * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2 94 */ 95 #define LEAF_MIPS32R2(symbol) \ 96 .globl symbol; \ 97 .align 2; \ 98 .type symbol, @function; \ 99 .ent symbol, 0; \ 100 symbol: .frame sp, 0, ra; \ 101 .set push; \ 102 .set arch=mips32r2; \ 103 .set noreorder; \ 104 .set noat; 105 106 /* 107 * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2 108 */ 109 #define LEAF_MIPS_DSPR2(symbol) \ 110 LEAF_MIPS32R2(symbol) \ 111 .set dspr2; 112 113 /* 114 * END - mark end of function 115 */ 116 #define END(function) \ 117 .set pop; \ 118 .end function; \ 119 .size function,.-function 120 121 /* 122 * Checks if stack offset is big enough for storing/restoring regs_num 123 * number of register to/from stack. Stack offset must be greater than 124 * or equal to the number of bytes needed for storing registers (regs_num*4). 125 * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is 126 * preserved for input arguments of the functions, already stored in a0-a3), 127 * stack size can be further optimized by utilizing this space. 128 */ 129 .macro CHECK_STACK_OFFSET regs_num, stack_offset 130 .if \stack_offset < \regs_num * 4 - 16 131 .error "Stack offset too small." 132 .endif 133 .endm 134 135 /* 136 * Saves set of registers on stack. Maximum number of registers that 137 * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). 138 * Stack offset is number of bytes that are added to stack pointer (sp) 139 * before registers are pushed in order to provide enough space on stack 140 * (offset must be multiple of 4, and must be big enough, as described by 141 * CHECK_STACK_OFFSET macro). This macro is intended to be used in 142 * combination with RESTORE_REGS_FROM_STACK macro. Example: 143 * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 144 * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 145 */ 146 .macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \ 147 r2 = 0, r3 = 0, r4 = 0, \ 148 r5 = 0, r6 = 0, r7 = 0, \ 149 r8 = 0, r9 = 0, r10 = 0, \ 150 r11 = 0, r12 = 0, r13 = 0, \ 151 r14 = 0 152 .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4) 153 .error "Stack offset must be pozitive and multiple of 4." 154 .endif 155 .if \stack_offset != 0 156 addiu sp, sp, -\stack_offset 157 .endif 158 sw \r1, 0(sp) 159 .if \r2 != 0 160 sw \r2, 4(sp) 161 .endif 162 .if \r3 != 0 163 sw \r3, 8(sp) 164 .endif 165 .if \r4 != 0 166 sw \r4, 12(sp) 167 .endif 168 .if \r5 != 0 169 CHECK_STACK_OFFSET 5, \stack_offset 170 sw \r5, 16(sp) 171 .endif 172 .if \r6 != 0 173 CHECK_STACK_OFFSET 6, \stack_offset 174 sw \r6, 20(sp) 175 .endif 176 .if \r7 != 0 177 CHECK_STACK_OFFSET 7, \stack_offset 178 sw \r7, 24(sp) 179 .endif 180 .if \r8 != 0 181 CHECK_STACK_OFFSET 8, \stack_offset 182 sw \r8, 28(sp) 183 .endif 184 .if \r9 != 0 185 CHECK_STACK_OFFSET 9, \stack_offset 186 sw \r9, 32(sp) 187 .endif 188 .if \r10 != 0 189 CHECK_STACK_OFFSET 10, \stack_offset 190 sw \r10, 36(sp) 191 .endif 192 .if \r11 != 0 193 CHECK_STACK_OFFSET 11, \stack_offset 194 sw \r11, 40(sp) 195 .endif 196 .if \r12 != 0 197 CHECK_STACK_OFFSET 12, \stack_offset 198 sw \r12, 44(sp) 199 .endif 200 .if \r13 != 0 201 CHECK_STACK_OFFSET 13, \stack_offset 202 sw \r13, 48(sp) 203 .endif 204 .if \r14 != 0 205 CHECK_STACK_OFFSET 14, \stack_offset 206 sw \r14, 52(sp) 207 .endif 208 .endm 209 210 /* 211 * Restores set of registers from stack. Maximum number of registers that 212 * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7). 213 * Stack offset is number of bytes that are added to stack pointer (sp) 214 * after registers are restored (offset must be multiple of 4, and must 215 * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is 216 * intended to be used in combination with RESTORE_REGS_FROM_STACK macro. 217 * Example: 218 * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1 219 * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1 220 */ 221 .macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \ 222 r2 = 0, r3 = 0, r4 = 0, \ 223 r5 = 0, r6 = 0, r7 = 0, \ 224 r8 = 0, r9 = 0, r10 = 0, \ 225 r11 = 0, r12 = 0, r13 = 0, \ 226 r14 = 0 227 .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4) 228 .error "Stack offset must be pozitive and multiple of 4." 229 .endif 230 lw \r1, 0(sp) 231 .if \r2 != 0 232 lw \r2, 4(sp) 233 .endif 234 .if \r3 != 0 235 lw \r3, 8(sp) 236 .endif 237 .if \r4 != 0 238 lw \r4, 12(sp) 239 .endif 240 .if \r5 != 0 241 CHECK_STACK_OFFSET 5, \stack_offset 242 lw \r5, 16(sp) 243 .endif 244 .if \r6 != 0 245 CHECK_STACK_OFFSET 6, \stack_offset 246 lw \r6, 20(sp) 247 .endif 248 .if \r7 != 0 249 CHECK_STACK_OFFSET 7, \stack_offset 250 lw \r7, 24(sp) 251 .endif 252 .if \r8 != 0 253 CHECK_STACK_OFFSET 8, \stack_offset 254 lw \r8, 28(sp) 255 .endif 256 .if \r9 != 0 257 CHECK_STACK_OFFSET 9, \stack_offset 258 lw \r9, 32(sp) 259 .endif 260 .if \r10 != 0 261 CHECK_STACK_OFFSET 10, \stack_offset 262 lw \r10, 36(sp) 263 .endif 264 .if \r11 != 0 265 CHECK_STACK_OFFSET 11, \stack_offset 266 lw \r11, 40(sp) 267 .endif 268 .if \r12 != 0 269 CHECK_STACK_OFFSET 12, \stack_offset 270 lw \r12, 44(sp) 271 .endif 272 .if \r13 != 0 273 CHECK_STACK_OFFSET 13, \stack_offset 274 lw \r13, 48(sp) 275 .endif 276 .if \r14 != 0 277 CHECK_STACK_OFFSET 14, \stack_offset 278 lw \r14, 52(sp) 279 .endif 280 .if \stack_offset != 0 281 addiu sp, sp, \stack_offset 282 .endif 283 .endm 284 285 286