1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 EXPORT |vp8_short_walsh4x4_armv6| 12 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 AREA |.text|, CODE, READONLY ; name this block of code 18 19;short vp8_short_walsh4x4_armv6(short *input, short *output, int pitch) 20; r0 short *input, 21; r1 short *output, 22; r2 int pitch 23|vp8_short_walsh4x4_armv6| PROC 24 25 stmdb sp!, {r4 - r11, lr} 26 27 ldrd r4, r5, [r0], r2 28 ldr lr, c00040004 29 ldrd r6, r7, [r0], r2 30 31 ; 0-3 32 qadd16 r3, r4, r5 ; [d1|a1] [1+3 | 0+2] 33 qsub16 r4, r4, r5 ; [c1|b1] [1-3 | 0-2] 34 35 ldrd r8, r9, [r0], r2 36 ; 4-7 37 qadd16 r5, r6, r7 ; [d1|a1] [5+7 | 4+6] 38 qsub16 r6, r6, r7 ; [c1|b1] [5-7 | 4-6] 39 40 ldrd r10, r11, [r0] 41 ; 8-11 42 qadd16 r7, r8, r9 ; [d1|a1] [9+11 | 8+10] 43 qsub16 r8, r8, r9 ; [c1|b1] [9-11 | 8-10] 44 45 ; 12-15 46 qadd16 r9, r10, r11 ; [d1|a1] [13+15 | 12+14] 47 qsub16 r10, r10, r11 ; [c1|b1] [13-15 | 12-14] 48 49 50 lsls r2, r3, #16 51 smuad r11, r3, lr ; A0 = a1<<2 + d1<<2 52 addne r11, r11, #1 ; A0 += (a1!=0) 53 54 lsls r2, r7, #16 55 smuad r12, r7, lr ; C0 = a1<<2 + d1<<2 56 addne r12, r12, #1 ; C0 += (a1!=0) 57 58 add r0, r11, r12 ; a1_0 = A0 + C0 59 sub r11, r11, r12 ; b1_0 = A0 - C0 60 61 lsls r2, r5, #16 62 smuad r12, r5, lr ; B0 = a1<<2 + d1<<2 63 addne r12, r12, #1 ; B0 += (a1!=0) 64 65 lsls r2, r9, #16 66 smuad r2, r9, lr ; D0 = a1<<2 + d1<<2 67 addne r2, r2, #1 ; D0 += (a1!=0) 68 69 add lr, r12, r2 ; d1_0 = B0 + D0 70 sub r12, r12, r2 ; c1_0 = B0 - D0 71 72 ; op[0,4,8,12] 73 adds r2, r0, lr ; a2 = a1_0 + d1_0 74 addmi r2, r2, #1 ; += a2 < 0 75 add r2, r2, #3 ; += 3 76 subs r0, r0, lr ; d2 = a1_0 - d1_0 77 mov r2, r2, asr #3 ; >> 3 78 strh r2, [r1] ; op[0] 79 80 addmi r0, r0, #1 ; += a2 < 0 81 add r0, r0, #3 ; += 3 82 ldr lr, c00040004 83 mov r0, r0, asr #3 ; >> 3 84 strh r0, [r1, #24] ; op[12] 85 86 adds r2, r11, r12 ; b2 = b1_0 + c1_0 87 addmi r2, r2, #1 ; += a2 < 0 88 add r2, r2, #3 ; += 3 89 subs r0, r11, r12 ; c2 = b1_0 - c1_0 90 mov r2, r2, asr #3 ; >> 3 91 strh r2, [r1, #8] ; op[4] 92 93 addmi r0, r0, #1 ; += a2 < 0 94 add r0, r0, #3 ; += 3 95 smusd r3, r3, lr ; A3 = a1<<2 - d1<<2 96 smusd r7, r7, lr ; C3 = a1<<2 - d1<<2 97 mov r0, r0, asr #3 ; >> 3 98 strh r0, [r1, #16] ; op[8] 99 100 101 ; op[3,7,11,15] 102 add r0, r3, r7 ; a1_3 = A3 + C3 103 sub r3, r3, r7 ; b1_3 = A3 - C3 104 105 smusd r5, r5, lr ; B3 = a1<<2 - d1<<2 106 smusd r9, r9, lr ; D3 = a1<<2 - d1<<2 107 add r7, r5, r9 ; d1_3 = B3 + D3 108 sub r5, r5, r9 ; c1_3 = B3 - D3 109 110 adds r2, r0, r7 ; a2 = a1_3 + d1_3 111 addmi r2, r2, #1 ; += a2 < 0 112 add r2, r2, #3 ; += 3 113 adds r9, r3, r5 ; b2 = b1_3 + c1_3 114 mov r2, r2, asr #3 ; >> 3 115 strh r2, [r1, #6] ; op[3] 116 117 addmi r9, r9, #1 ; += a2 < 0 118 add r9, r9, #3 ; += 3 119 subs r2, r3, r5 ; c2 = b1_3 - c1_3 120 mov r9, r9, asr #3 ; >> 3 121 strh r9, [r1, #14] ; op[7] 122 123 addmi r2, r2, #1 ; += a2 < 0 124 add r2, r2, #3 ; += 3 125 subs r9, r0, r7 ; d2 = a1_3 - d1_3 126 mov r2, r2, asr #3 ; >> 3 127 strh r2, [r1, #22] ; op[11] 128 129 addmi r9, r9, #1 ; += a2 < 0 130 add r9, r9, #3 ; += 3 131 smuad r3, r4, lr ; A1 = b1<<2 + c1<<2 132 smuad r5, r8, lr ; C1 = b1<<2 + c1<<2 133 mov r9, r9, asr #3 ; >> 3 134 strh r9, [r1, #30] ; op[15] 135 136 ; op[1,5,9,13] 137 add r0, r3, r5 ; a1_1 = A1 + C1 138 sub r3, r3, r5 ; b1_1 = A1 - C1 139 140 smuad r7, r6, lr ; B1 = b1<<2 + c1<<2 141 smuad r9, r10, lr ; D1 = b1<<2 + c1<<2 142 add r5, r7, r9 ; d1_1 = B1 + D1 143 sub r7, r7, r9 ; c1_1 = B1 - D1 144 145 adds r2, r0, r5 ; a2 = a1_1 + d1_1 146 addmi r2, r2, #1 ; += a2 < 0 147 add r2, r2, #3 ; += 3 148 adds r9, r3, r7 ; b2 = b1_1 + c1_1 149 mov r2, r2, asr #3 ; >> 3 150 strh r2, [r1, #2] ; op[1] 151 152 addmi r9, r9, #1 ; += a2 < 0 153 add r9, r9, #3 ; += 3 154 subs r2, r3, r7 ; c2 = b1_1 - c1_1 155 mov r9, r9, asr #3 ; >> 3 156 strh r9, [r1, #10] ; op[5] 157 158 addmi r2, r2, #1 ; += a2 < 0 159 add r2, r2, #3 ; += 3 160 subs r9, r0, r5 ; d2 = a1_1 - d1_1 161 mov r2, r2, asr #3 ; >> 3 162 strh r2, [r1, #18] ; op[9] 163 164 addmi r9, r9, #1 ; += a2 < 0 165 add r9, r9, #3 ; += 3 166 smusd r4, r4, lr ; A2 = b1<<2 - c1<<2 167 smusd r8, r8, lr ; C2 = b1<<2 - c1<<2 168 mov r9, r9, asr #3 ; >> 3 169 strh r9, [r1, #26] ; op[13] 170 171 172 ; op[2,6,10,14] 173 add r11, r4, r8 ; a1_2 = A2 + C2 174 sub r12, r4, r8 ; b1_2 = A2 - C2 175 176 smusd r6, r6, lr ; B2 = b1<<2 - c1<<2 177 smusd r10, r10, lr ; D2 = b1<<2 - c1<<2 178 add r4, r6, r10 ; d1_2 = B2 + D2 179 sub r8, r6, r10 ; c1_2 = B2 - D2 180 181 adds r2, r11, r4 ; a2 = a1_2 + d1_2 182 addmi r2, r2, #1 ; += a2 < 0 183 add r2, r2, #3 ; += 3 184 adds r9, r12, r8 ; b2 = b1_2 + c1_2 185 mov r2, r2, asr #3 ; >> 3 186 strh r2, [r1, #4] ; op[2] 187 188 addmi r9, r9, #1 ; += a2 < 0 189 add r9, r9, #3 ; += 3 190 subs r2, r12, r8 ; c2 = b1_2 - c1_2 191 mov r9, r9, asr #3 ; >> 3 192 strh r9, [r1, #12] ; op[6] 193 194 addmi r2, r2, #1 ; += a2 < 0 195 add r2, r2, #3 ; += 3 196 subs r9, r11, r4 ; d2 = a1_2 - d1_2 197 mov r2, r2, asr #3 ; >> 3 198 strh r2, [r1, #20] ; op[10] 199 200 addmi r9, r9, #1 ; += a2 < 0 201 add r9, r9, #3 ; += 3 202 mov r9, r9, asr #3 ; >> 3 203 strh r9, [r1, #28] ; op[14] 204 205 206 ldmia sp!, {r4 - r11, pc} 207 ENDP ; |vp8_short_walsh4x4_armv6| 208 209c00040004 210 DCD 0x00040004 211 212 END 213