1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6| 13 EXPORT |vp8_loop_filter_simple_vertical_edge_armv6| 14 15 AREA |.text|, CODE, READONLY ; name this block of code 16 17 MACRO 18 TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3 19 ; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3 20 ; a0: 03 02 01 00 21 ; a1: 13 12 11 10 22 ; a2: 23 22 21 20 23 ; a3: 33 32 31 30 24 ; b3 b2 b1 b0 25 26 uxtb16 $b1, $a1 ; xx 12 xx 10 27 uxtb16 $b0, $a0 ; xx 02 xx 00 28 uxtb16 $b3, $a3 ; xx 32 xx 30 29 uxtb16 $b2, $a2 ; xx 22 xx 20 30 orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00 31 orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20 32 33 uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11 34 uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31 35 uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01 36 uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21 37 orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01 38 orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21 39 40 pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1 41 pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3 42 43 pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0 44 pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2 45 MEND 46 47 48 49src RN r0 50pstep RN r1 51 52;r0 unsigned char *src_ptr, 53;r1 int src_pixel_step, 54;r2 const char *blimit 55 56;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 57|vp8_loop_filter_simple_horizontal_edge_armv6| PROC 58;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 59 stmdb sp!, {r4 - r11, lr} 60 61 ldrb r12, [r2] ; blimit 62 ldr r3, [src, -pstep, lsl #1] ; p1 63 ldr r4, [src, -pstep] ; p0 64 ldr r5, [src] ; q0 65 ldr r6, [src, pstep] ; q1 66 orr r12, r12, r12, lsl #8 ; blimit 67 ldr r2, c0x80808080 68 orr r12, r12, r12, lsl #16 ; blimit 69 mov r9, #4 ; double the count. we're doing 4 at a time 70 mov lr, #0 ; need 0 in a couple places 71 72|simple_hnext8| 73 ; vp8_simple_filter_mask() 74 75 uqsub8 r7, r3, r6 ; p1 - q1 76 uqsub8 r8, r6, r3 ; q1 - p1 77 uqsub8 r10, r4, r5 ; p0 - q0 78 uqsub8 r11, r5, r4 ; q0 - p0 79 orr r8, r8, r7 ; abs(p1 - q1) 80 orr r10, r10, r11 ; abs(p0 - q0) 81 uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2 82 uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1 83 uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 84 mvn r8, #0 85 usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags 86 sel r10, r8, lr ; filter mask: F or 0 87 cmp r10, #0 88 beq simple_hskip_filter ; skip filtering if all masks are 0x00 89 90 ;vp8_simple_filter() 91 92 eor r3, r3, r2 ; p1 offset to convert to a signed value 93 eor r6, r6, r2 ; q1 offset to convert to a signed value 94 eor r4, r4, r2 ; p0 offset to convert to a signed value 95 eor r5, r5, r2 ; q0 offset to convert to a signed value 96 97 qsub8 r3, r3, r6 ; vp8_filter = p1 - q1 98 qsub8 r6, r5, r4 ; q0 - p0 99 qadd8 r3, r3, r6 ; += q0 - p0 100 ldr r7, c0x04040404 101 qadd8 r3, r3, r6 ; += q0 - p0 102 ldr r8, c0x03030303 103 qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0)) 104 ;STALL 105 and r3, r3, r10 ; vp8_filter &= mask 106 107 qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4 108 qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3 109 110 shadd8 r7 , r7 , lr 111 shadd8 r8 , r8 , lr 112 shadd8 r7 , r7 , lr 113 shadd8 r8 , r8 , lr 114 shadd8 r7 , r7 , lr ; Filter1 >>= 3 115 shadd8 r8 , r8 , lr ; Filter2 >>= 3 116 117 qsub8 r5 ,r5, r7 ; u = q0 - Filter1 118 qadd8 r4, r4, r8 ; u = p0 + Filter2 119 eor r5, r5, r2 ; *oq0 = u^0x80 120 str r5, [src] ; store oq0 result 121 eor r4, r4, r2 ; *op0 = u^0x80 122 str r4, [src, -pstep] ; store op0 result 123 124|simple_hskip_filter| 125 subs r9, r9, #1 126 addne src, src, #4 ; next row 127 128 ldrne r3, [src, -pstep, lsl #1] ; p1 129 ldrne r4, [src, -pstep] ; p0 130 ldrne r5, [src] ; q0 131 ldrne r6, [src, pstep] ; q1 132 133 bne simple_hnext8 134 135 ldmia sp!, {r4 - r11, pc} 136 ENDP ; |vp8_loop_filter_simple_horizontal_edge_armv6| 137 138 139;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 140|vp8_loop_filter_simple_vertical_edge_armv6| PROC 141;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- 142 stmdb sp!, {r4 - r11, lr} 143 144 ldrb r12, [r2] ; r12: blimit 145 ldr r2, c0x80808080 146 orr r12, r12, r12, lsl #8 147 148 ; load soure data to r7, r8, r9, r10 149 ldrh r3, [src, #-2] 150 pld [src, #23] ; preload for next block 151 ldrh r4, [src], pstep 152 orr r12, r12, r12, lsl #16 153 154 ldrh r5, [src, #-2] 155 pld [src, #23] 156 ldrh r6, [src], pstep 157 158 pkhbt r7, r3, r4, lsl #16 159 160 ldrh r3, [src, #-2] 161 pld [src, #23] 162 ldrh r4, [src], pstep 163 164 pkhbt r8, r5, r6, lsl #16 165 166 ldrh r5, [src, #-2] 167 pld [src, #23] 168 ldrh r6, [src], pstep 169 mov r11, #4 ; double the count. we're doing 4 at a time 170 171|simple_vnext8| 172 ; vp8_simple_filter_mask() function 173 pkhbt r9, r3, r4, lsl #16 174 pkhbt r10, r5, r6, lsl #16 175 176 ;transpose r7, r8, r9, r10 to r3, r4, r5, r6 177 TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6 178 179 uqsub8 r7, r3, r6 ; p1 - q1 180 uqsub8 r8, r6, r3 ; q1 - p1 181 uqsub8 r9, r4, r5 ; p0 - q0 182 uqsub8 r10, r5, r4 ; q0 - p0 183 orr r7, r7, r8 ; abs(p1 - q1) 184 orr r9, r9, r10 ; abs(p0 - q0) 185 mov r8, #0 186 uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2 187 uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2 188 uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 189 mvn r10, #0 ; r10 == -1 190 191 usub8 r7, r12, r7 ; compare to flimit 192 sel lr, r10, r8 ; filter mask 193 194 cmp lr, #0 195 beq simple_vskip_filter ; skip filtering 196 197 ;vp8_simple_filter() function 198 eor r3, r3, r2 ; p1 offset to convert to a signed value 199 eor r6, r6, r2 ; q1 offset to convert to a signed value 200 eor r4, r4, r2 ; p0 offset to convert to a signed value 201 eor r5, r5, r2 ; q0 offset to convert to a signed value 202 203 qsub8 r3, r3, r6 ; vp8_filter = p1 - q1 204 qsub8 r6, r5, r4 ; q0 - p0 205 206 qadd8 r3, r3, r6 ; vp8_filter += q0 - p0 207 ldr r9, c0x03030303 ; r9 = 3 208 209 qadd8 r3, r3, r6 ; vp8_filter += q0 - p0 210 ldr r7, c0x04040404 211 212 qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0)) 213 ;STALL 214 and r3, r3, lr ; vp8_filter &= mask 215 216 qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3 217 qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4 218 219 shadd8 r9 , r9 , r8 220 shadd8 r3 , r3 , r8 221 shadd8 r9 , r9 , r8 222 shadd8 r3 , r3 , r8 223 shadd8 r9 , r9 , r8 ; Filter2 >>= 3 224 shadd8 r3 , r3 , r8 ; Filter1 >>= 3 225 226 ;calculate output 227 sub src, src, pstep, lsl #2 228 229 qadd8 r4, r4, r9 ; u = p0 + Filter2 230 qsub8 r5, r5, r3 ; u = q0 - Filter1 231 eor r4, r4, r2 ; *op0 = u^0x80 232 eor r5, r5, r2 ; *oq0 = u^0x80 233 234 strb r4, [src, #-1] ; store the result 235 mov r4, r4, lsr #8 236 strb r5, [src], pstep 237 mov r5, r5, lsr #8 238 239 strb r4, [src, #-1] 240 mov r4, r4, lsr #8 241 strb r5, [src], pstep 242 mov r5, r5, lsr #8 243 244 strb r4, [src, #-1] 245 mov r4, r4, lsr #8 246 strb r5, [src], pstep 247 mov r5, r5, lsr #8 248 249 strb r4, [src, #-1] 250 strb r5, [src], pstep 251 252|simple_vskip_filter| 253 subs r11, r11, #1 254 255 ; load soure data to r7, r8, r9, r10 256 ldrneh r3, [src, #-2] 257 pld [src, #23] ; preload for next block 258 ldrneh r4, [src], pstep 259 260 ldrneh r5, [src, #-2] 261 pld [src, #23] 262 ldrneh r6, [src], pstep 263 264 pkhbt r7, r3, r4, lsl #16 265 266 ldrneh r3, [src, #-2] 267 pld [src, #23] 268 ldrneh r4, [src], pstep 269 270 pkhbt r8, r5, r6, lsl #16 271 272 ldrneh r5, [src, #-2] 273 pld [src, #23] 274 ldrneh r6, [src], pstep 275 276 bne simple_vnext8 277 278 ldmia sp!, {r4 - r11, pc} 279 ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6| 280 281; Constant Pool 282c0x80808080 DCD 0x80808080 283c0x03030303 DCD 0x03030303 284c0x04040404 DCD 0x04040404 285 286 END 287