1; 2; Copyright (c) 2011 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vpx_variance_halfpixvar16x16_hv_media| 13 14 ARM 15 REQUIRE8 16 PRESERVE8 17 18 AREA ||.text||, CODE, READONLY, ALIGN=2 19 20; r0 unsigned char *src_ptr 21; r1 int source_stride 22; r2 unsigned char *ref_ptr 23; r3 int recon_stride 24; stack unsigned int *sse 25|vpx_variance_halfpixvar16x16_hv_media| PROC 26 27 stmfd sp!, {r4-r12, lr} 28 29 pld [r0, r1, lsl #0] 30 pld [r2, r3, lsl #0] 31 32 mov r8, #0 ; initialize sum = 0 33 ldr r10, c80808080 34 mov r11, #0 ; initialize sse = 0 35 mov r12, #16 ; set loop counter to 16 (=block height) 36 mov lr, #0 ; constant zero 37loop 38 add r9, r0, r1 ; pointer to pixels on the next row 39 ; 1st 4 pixels 40 ldr r4, [r0, #0] ; load source pixels a, row N 41 ldr r6, [r0, #1] ; load source pixels b, row N 42 ldr r5, [r9, #0] ; load source pixels c, row N+1 43 ldr r7, [r9, #1] ; load source pixels d, row N+1 44 45 ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N 46 mvn r6, r6 47 uhsub8 r4, r4, r6 48 eor r4, r4, r10 49 ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 50 mvn r7, r7 51 uhsub8 r5, r5, r7 52 eor r5, r5, r10 53 ; z = (x + y + 1) >> 1, interpolate half pixel values vertically 54 mvn r5, r5 55 uhsub8 r4, r4, r5 56 ldr r5, [r2, #0] ; load 4 ref pixels 57 eor r4, r4, r10 58 59 usub8 r6, r4, r5 ; calculate difference 60 pld [r0, r1, lsl #1] 61 sel r7, r6, lr ; select bytes with positive difference 62 usub8 r6, r5, r4 ; calculate difference with reversed operands 63 pld [r2, r3, lsl #1] 64 sel r6, r6, lr ; select bytes with negative difference 65 66 ; calculate partial sums 67 usad8 r4, r7, lr ; calculate sum of positive differences 68 usad8 r5, r6, lr ; calculate sum of negative differences 69 orr r6, r6, r7 ; differences of all 4 pixels 70 ; calculate total sum 71 adds r8, r8, r4 ; add positive differences to sum 72 subs r8, r8, r5 ; subtract negative differences from sum 73 74 ; calculate sse 75 uxtb16 r5, r6 ; byte (two pixels) to halfwords 76 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 77 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 78 79 ; 2nd 4 pixels 80 ldr r4, [r0, #4] ; load source pixels a, row N 81 ldr r6, [r0, #5] ; load source pixels b, row N 82 ldr r5, [r9, #4] ; load source pixels c, row N+1 83 84 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 85 86 ldr r7, [r9, #5] ; load source pixels d, row N+1 87 88 ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N 89 mvn r6, r6 90 uhsub8 r4, r4, r6 91 eor r4, r4, r10 92 ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 93 mvn r7, r7 94 uhsub8 r5, r5, r7 95 eor r5, r5, r10 96 ; z = (x + y + 1) >> 1, interpolate half pixel values vertically 97 mvn r5, r5 98 uhsub8 r4, r4, r5 99 ldr r5, [r2, #4] ; load 4 ref pixels 100 eor r4, r4, r10 101 102 usub8 r6, r4, r5 ; calculate difference 103 sel r7, r6, lr ; select bytes with positive difference 104 usub8 r6, r5, r4 ; calculate difference with reversed operands 105 sel r6, r6, lr ; select bytes with negative difference 106 107 ; calculate partial sums 108 usad8 r4, r7, lr ; calculate sum of positive differences 109 usad8 r5, r6, lr ; calculate sum of negative differences 110 orr r6, r6, r7 ; differences of all 4 pixels 111 112 ; calculate total sum 113 add r8, r8, r4 ; add positive differences to sum 114 sub r8, r8, r5 ; subtract negative differences from sum 115 116 ; calculate sse 117 uxtb16 r5, r6 ; byte (two pixels) to halfwords 118 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 119 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 120 121 ; 3rd 4 pixels 122 ldr r4, [r0, #8] ; load source pixels a, row N 123 ldr r6, [r0, #9] ; load source pixels b, row N 124 ldr r5, [r9, #8] ; load source pixels c, row N+1 125 126 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 127 128 ldr r7, [r9, #9] ; load source pixels d, row N+1 129 130 ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N 131 mvn r6, r6 132 uhsub8 r4, r4, r6 133 eor r4, r4, r10 134 ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 135 mvn r7, r7 136 uhsub8 r5, r5, r7 137 eor r5, r5, r10 138 ; z = (x + y + 1) >> 1, interpolate half pixel values vertically 139 mvn r5, r5 140 uhsub8 r4, r4, r5 141 ldr r5, [r2, #8] ; load 4 ref pixels 142 eor r4, r4, r10 143 144 usub8 r6, r4, r5 ; calculate difference 145 sel r7, r6, lr ; select bytes with positive difference 146 usub8 r6, r5, r4 ; calculate difference with reversed operands 147 sel r6, r6, lr ; select bytes with negative difference 148 149 ; calculate partial sums 150 usad8 r4, r7, lr ; calculate sum of positive differences 151 usad8 r5, r6, lr ; calculate sum of negative differences 152 orr r6, r6, r7 ; differences of all 4 pixels 153 154 ; calculate total sum 155 add r8, r8, r4 ; add positive differences to sum 156 sub r8, r8, r5 ; subtract negative differences from sum 157 158 ; calculate sse 159 uxtb16 r5, r6 ; byte (two pixels) to halfwords 160 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 161 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 162 163 ; 4th 4 pixels 164 ldr r4, [r0, #12] ; load source pixels a, row N 165 ldr r6, [r0, #13] ; load source pixels b, row N 166 ldr r5, [r9, #12] ; load source pixels c, row N+1 167 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 168 ldr r7, [r9, #13] ; load source pixels d, row N+1 169 170 ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N 171 mvn r6, r6 172 uhsub8 r4, r4, r6 173 eor r4, r4, r10 174 ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 175 mvn r7, r7 176 uhsub8 r5, r5, r7 177 eor r5, r5, r10 178 ; z = (x + y + 1) >> 1, interpolate half pixel values vertically 179 mvn r5, r5 180 uhsub8 r4, r4, r5 181 ldr r5, [r2, #12] ; load 4 ref pixels 182 eor r4, r4, r10 183 184 usub8 r6, r4, r5 ; calculate difference 185 add r0, r0, r1 ; set src_ptr to next row 186 sel r7, r6, lr ; select bytes with positive difference 187 usub8 r6, r5, r4 ; calculate difference with reversed operands 188 add r2, r2, r3 ; set dst_ptr to next row 189 sel r6, r6, lr ; select bytes with negative difference 190 191 ; calculate partial sums 192 usad8 r4, r7, lr ; calculate sum of positive differences 193 usad8 r5, r6, lr ; calculate sum of negative differences 194 orr r6, r6, r7 ; differences of all 4 pixels 195 196 ; calculate total sum 197 add r8, r8, r4 ; add positive differences to sum 198 sub r8, r8, r5 ; subtract negative differences from sum 199 200 ; calculate sse 201 uxtb16 r5, r6 ; byte (two pixels) to halfwords 202 uxtb16 r7, r6, ror #8 ; another two pixels to halfwords 203 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) 204 subs r12, r12, #1 205 smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2) 206 207 bne loop 208 209 ; return stuff 210 ldr r6, [sp, #40] ; get address of sse 211 mul r0, r8, r8 ; sum * sum 212 str r11, [r6] ; store sse 213 sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8)) 214 215 ldmfd sp!, {r4-r12, pc} 216 217 ENDP 218 219c80808080 220 DCD 0x80808080 221 222 END 223