1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_filter_block2d_bil_first_pass_armv6| 13 EXPORT |vp8_filter_block2d_bil_second_pass_armv6| 14 15 AREA |.text|, CODE, READONLY ; name this block of code 16 17;------------------------------------- 18; r0 unsigned char *src_ptr, 19; r1 unsigned short *dst_ptr, 20; r2 unsigned int src_pitch, 21; r3 unsigned int height, 22; stack unsigned int width, 23; stack const short *vp8_filter 24;------------------------------------- 25; The output is transposed stroed in output array to make it easy for second pass filtering. 26|vp8_filter_block2d_bil_first_pass_armv6| PROC 27 stmdb sp!, {r4 - r11, lr} 28 29 ldr r11, [sp, #40] ; vp8_filter address 30 ldr r4, [sp, #36] ; width 31 32 mov r12, r3 ; outer-loop counter 33 34 add r7, r2, r4 ; preload next row 35 pld [r0, r7] 36 37 sub r2, r2, r4 ; src increment for height loop 38 39 ldr r5, [r11] ; load up filter coefficients 40 41 mov r3, r3, lsl #1 ; height*2 42 add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1) 43 44 mov r11, r1 ; save dst_ptr for each row 45 46 cmp r5, #128 ; if filter coef = 128, then skip the filter 47 beq bil_null_1st_filter 48 49|bil_height_loop_1st_v6| 50 ldrb r6, [r0] ; load source data 51 ldrb r7, [r0, #1] 52 ldrb r8, [r0, #2] 53 mov lr, r4, lsr #2 ; 4-in-parellel loop counter 54 55|bil_width_loop_1st_v6| 56 ldrb r9, [r0, #3] 57 ldrb r10, [r0, #4] 58 59 pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0] 60 pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1] 61 62 smuad r6, r6, r5 ; apply the filter 63 pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2] 64 smuad r7, r7, r5 65 pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3] 66 67 smuad r8, r8, r5 68 smuad r9, r9, r5 69 70 add r0, r0, #4 71 subs lr, lr, #1 72 73 add r6, r6, #0x40 ; round_shift_and_clamp 74 add r7, r7, #0x40 75 usat r6, #16, r6, asr #7 76 usat r7, #16, r7, asr #7 77 78 strh r6, [r1], r3 ; result is transposed and stored 79 80 add r8, r8, #0x40 ; round_shift_and_clamp 81 strh r7, [r1], r3 82 add r9, r9, #0x40 83 usat r8, #16, r8, asr #7 84 usat r9, #16, r9, asr #7 85 86 strh r8, [r1], r3 ; result is transposed and stored 87 88 ldrneb r6, [r0] ; load source data 89 strh r9, [r1], r3 90 91 ldrneb r7, [r0, #1] 92 ldrneb r8, [r0, #2] 93 94 bne bil_width_loop_1st_v6 95 96 add r0, r0, r2 ; move to next input row 97 subs r12, r12, #1 98 99 add r9, r2, r4, lsl #1 ; adding back block width 100 pld [r0, r9] ; preload next row 101 102 add r11, r11, #2 ; move over to next column 103 mov r1, r11 104 105 bne bil_height_loop_1st_v6 106 107 ldmia sp!, {r4 - r11, pc} 108 109|bil_null_1st_filter| 110|bil_height_loop_null_1st| 111 mov lr, r4, lsr #2 ; loop counter 112 113|bil_width_loop_null_1st| 114 ldrb r6, [r0] ; load data 115 ldrb r7, [r0, #1] 116 ldrb r8, [r0, #2] 117 ldrb r9, [r0, #3] 118 119 strh r6, [r1], r3 ; store it to immediate buffer 120 add r0, r0, #4 121 strh r7, [r1], r3 122 subs lr, lr, #1 123 strh r8, [r1], r3 124 strh r9, [r1], r3 125 126 bne bil_width_loop_null_1st 127 128 subs r12, r12, #1 129 add r0, r0, r2 ; move to next input line 130 add r11, r11, #2 ; move over to next column 131 mov r1, r11 132 133 bne bil_height_loop_null_1st 134 135 ldmia sp!, {r4 - r11, pc} 136 137 ENDP ; |vp8_filter_block2d_bil_first_pass_armv6| 138 139 140;--------------------------------- 141; r0 unsigned short *src_ptr, 142; r1 unsigned char *dst_ptr, 143; r2 int dst_pitch, 144; r3 unsigned int height, 145; stack unsigned int width, 146; stack const short *vp8_filter 147;--------------------------------- 148|vp8_filter_block2d_bil_second_pass_armv6| PROC 149 stmdb sp!, {r4 - r11, lr} 150 151 ldr r11, [sp, #40] ; vp8_filter address 152 ldr r4, [sp, #36] ; width 153 154 ldr r5, [r11] ; load up filter coefficients 155 mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix 156 mov r11, r1 157 158 cmp r5, #128 ; if filter coef = 128, then skip the filter 159 beq bil_null_2nd_filter 160 161|bil_height_loop_2nd| 162 ldr r6, [r0] ; load the data 163 ldr r8, [r0, #4] 164 ldrh r10, [r0, #8] 165 mov lr, r3, lsr #2 ; loop counter 166 167|bil_width_loop_2nd| 168 pkhtb r7, r6, r8 ; src[1] | src[2] 169 pkhtb r9, r8, r10 ; src[3] | src[4] 170 171 smuad r6, r6, r5 ; apply filter 172 smuad r8, r8, r5 ; apply filter 173 174 subs lr, lr, #1 175 176 smuadx r7, r7, r5 ; apply filter 177 smuadx r9, r9, r5 ; apply filter 178 179 add r0, r0, #8 180 181 add r6, r6, #0x40 ; round_shift_and_clamp 182 add r7, r7, #0x40 183 usat r6, #8, r6, asr #7 184 usat r7, #8, r7, asr #7 185 strb r6, [r1], r2 ; the result is transposed back and stored 186 187 add r8, r8, #0x40 ; round_shift_and_clamp 188 strb r7, [r1], r2 189 add r9, r9, #0x40 190 usat r8, #8, r8, asr #7 191 usat r9, #8, r9, asr #7 192 strb r8, [r1], r2 ; the result is transposed back and stored 193 194 ldrne r6, [r0] ; load data 195 strb r9, [r1], r2 196 ldrne r8, [r0, #4] 197 ldrneh r10, [r0, #8] 198 199 bne bil_width_loop_2nd 200 201 subs r12, r12, #1 202 add r0, r0, #4 ; update src for next row 203 add r11, r11, #1 204 mov r1, r11 205 206 bne bil_height_loop_2nd 207 ldmia sp!, {r4 - r11, pc} 208 209|bil_null_2nd_filter| 210|bil_height_loop_null_2nd| 211 mov lr, r3, lsr #2 212 213|bil_width_loop_null_2nd| 214 ldr r6, [r0], #4 ; load data 215 subs lr, lr, #1 216 ldr r8, [r0], #4 217 218 strb r6, [r1], r2 ; store data 219 mov r7, r6, lsr #16 220 strb r7, [r1], r2 221 mov r9, r8, lsr #16 222 strb r8, [r1], r2 223 strb r9, [r1], r2 224 225 bne bil_width_loop_null_2nd 226 227 subs r12, r12, #1 228 add r0, r0, #4 229 add r11, r11, #1 230 mov r1, r11 231 232 bne bil_height_loop_null_2nd 233 234 ldmia sp!, {r4 - r11, pc} 235 ENDP ; |vp8_filter_block2d_second_pass_armv6| 236 237 END 238