1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_yv12_extend_frame_borders_neon| 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 INCLUDE vpx_scale_asm_offsets.asm 18 19 AREA ||.text||, CODE, READONLY, ALIGN=2 20;void vp8_yv12_extend_frame_borders_neon (YV12_BUFFER_CONFIG *ybf); 21; we depend on VP8BORDERINPIXELS being 32 22 23|vp8_yv12_extend_frame_borders_neon| PROC 24 push {r4 - r10, lr} 25 vpush {d8 - d15} 26 27 ; Border = 32 28 ldr r3, [r0, #yv12_buffer_config_y_width] ; plane_width 29 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; src_ptr1 30 ldr r4, [r0, #yv12_buffer_config_y_height] ; plane_height 31 ldr lr, [r0, #yv12_buffer_config_y_stride] ; plane_stride 32 33; Border copy for Y plane 34; copy the left and right most columns out 35 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width) 36 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1 37 sub r5, r1, #32 ; dest_ptr1 = src_ptr1 - Border 38 39 mov r12, r4, lsr #2 ; plane_height / 4 40 41copy_left_right_y 42 vld1.8 {d0[], d1[]}, [r1], lr 43 vld1.8 {d4[], d5[]}, [r2], lr 44 vld1.8 {d8[], d9[]}, [r1], lr 45 vld1.8 {d12[], d13[]}, [r2], lr 46 vld1.8 {d16[], d17[]}, [r1], lr 47 vld1.8 {d20[], d21[]}, [r2], lr 48 vld1.8 {d24[], d25[]}, [r1], lr 49 vld1.8 {d28[], d29[]}, [r2], lr 50 51 vmov q1, q0 52 vmov q3, q2 53 vmov q5, q4 54 vmov q7, q6 55 vmov q9, q8 56 vmov q11, q10 57 vmov q13, q12 58 vmov q15, q14 59 60 subs r12, r12, #1 61 62 vst1.8 {q0, q1}, [r5], lr 63 vst1.8 {q2, q3}, [r6], lr 64 vst1.8 {q4, q5}, [r5], lr 65 vst1.8 {q6, q7}, [r6], lr 66 vst1.8 {q8, q9}, [r5], lr 67 vst1.8 {q10, q11}, [r6], lr 68 vst1.8 {q12, q13}, [r5], lr 69 vst1.8 {q14, q15}, [r6], lr 70 71 bne copy_left_right_y 72 73;Now copy the top and bottom source lines into each line of the respective borders 74 ldr r1, [r0, #yv12_buffer_config_y_buffer] ; y_buffer 75 mul r8, r4, lr ; plane_height * plane_stride 76 77 ; copy width is plane_stride 78 movs r12, lr, lsr #7 ; plane_stride / 128 79 80 sub r1, r1, #32 ; src_ptr1 = y_buffer - Border 81 add r6, r1, r8 ; dest_ptr2 = src_ptr2 - plane_stride (src_ptr1 + (plane_height * plane_stride)) 82 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride 83 sub r5, r1, lr, asl #5 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) 84 ble extra_y_copy_needed ; plane stride < 128 85 86copy_top_bottom_y 87 vld1.8 {q0, q1}, [r1]! 88 vld1.8 {q8, q9}, [r2]! 89 vld1.8 {q2, q3}, [r1]! 90 vld1.8 {q10, q11}, [r2]! 91 vld1.8 {q4, q5}, [r1]! 92 vld1.8 {q12, q13}, [r2]! 93 vld1.8 {q6, q7}, [r1]! 94 vld1.8 {q14, q15}, [r2]! 95 96 mov r7, #32 ; Border 97 98top_bottom_32 99 subs r7, r7, #1 100 101 vst1.8 {q0, q1}, [r5]! 102 vst1.8 {q8, q9}, [r6]! 103 vst1.8 {q2, q3}, [r5]! 104 vst1.8 {q10, q11}, [r6]! 105 vst1.8 {q4, q5}, [r5]! 106 vst1.8 {q12, q13}, [r6]! 107 vst1.8 {q6, q7}, [r5]! 108 vst1.8 {q14, q15}, [r6]! 109 110 add r5, r5, lr ; dest_ptr1 += plane_stride 111 sub r5, r5, #128 ; dest_ptr1 -= 128 112 add r6, r6, lr ; dest_ptr2 += plane_stride 113 sub r6, r6, #128 ; dest_ptr2 -= 128 114 115 bne top_bottom_32 116 117 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border* plane_stride) 118 add r6, r2, lr ; src_ptr2 + plane_stride 119 120 subs r12, r12, #1 121 bne copy_top_bottom_y 122 123extra_y_copy_needed 124 mov r7, lr, lsr #4 ; check to see if extra copy is needed 125 ands r7, r7, #0x7 126 bne extra_top_bottom_y 127end_of_border_copy_y 128 129;Border copy for U, V planes 130; Border = 16 131 ldr r7, [r0, #yv12_buffer_config_u_buffer] ; src_ptr1 132 ldr lr, [r0, #yv12_buffer_config_uv_stride] ; plane_stride 133 ldr r3, [r0, #yv12_buffer_config_uv_width] ; plane_width 134 ldr r4, [r0, #yv12_buffer_config_uv_height] ; plane_height 135 136 mov r10, #2 137 138;copy the left and right most columns out 139border_copy_uv 140 mov r1, r7 ; src_ptr1 needs to be saved for second half of loop 141 sub r5, r1, #16 ; dest_ptr1 = src_ptr1 - Border 142 add r6, r1, r3 ; dest_ptr2 = src_ptr2 + 1 (src_ptr1 + plane_width) 143 sub r2, r6, #1 ; src_ptr2 = src_ptr1 + plane_width - 1 144 145 mov r12, r4, lsr #3 ; plane_height / 8 146 147copy_left_right_uv 148 vld1.8 {d0[], d1[]}, [r1], lr 149 vld1.8 {d2[], d3[]}, [r2], lr 150 vld1.8 {d4[], d5[]}, [r1], lr 151 vld1.8 {d6[], d7[]}, [r2], lr 152 vld1.8 {d8[], d9[]}, [r1], lr 153 vld1.8 {d10[], d11[]}, [r2], lr 154 vld1.8 {d12[], d13[]}, [r1], lr 155 vld1.8 {d14[], d15[]}, [r2], lr 156 vld1.8 {d16[], d17[]}, [r1], lr 157 vld1.8 {d18[], d19[]}, [r2], lr 158 vld1.8 {d20[], d21[]}, [r1], lr 159 vld1.8 {d22[], d23[]}, [r2], lr 160 vld1.8 {d24[], d25[]}, [r1], lr 161 vld1.8 {d26[], d27[]}, [r2], lr 162 vld1.8 {d28[], d29[]}, [r1], lr 163 vld1.8 {d30[], d31[]}, [r2], lr 164 165 subs r12, r12, #1 166 167 vst1.8 {q0}, [r5], lr 168 vst1.8 {q1}, [r6], lr 169 vst1.8 {q2}, [r5], lr 170 vst1.8 {q3}, [r6], lr 171 vst1.8 {q4}, [r5], lr 172 vst1.8 {q5}, [r6], lr 173 vst1.8 {q6}, [r5], lr 174 vst1.8 {q7}, [r6], lr 175 vst1.8 {q8}, [r5], lr 176 vst1.8 {q9}, [r6], lr 177 vst1.8 {q10}, [r5], lr 178 vst1.8 {q11}, [r6], lr 179 vst1.8 {q12}, [r5], lr 180 vst1.8 {q13}, [r6], lr 181 vst1.8 {q14}, [r5], lr 182 vst1.8 {q15}, [r6], lr 183 184 bne copy_left_right_uv 185 186;Now copy the top and bottom source lines into each line of the respective borders 187 mov r1, r7 188 mul r8, r4, lr ; plane_height * plane_stride 189 movs r12, lr, lsr #6 ; plane_stride / 64 190 191 sub r1, r1, #16 ; src_ptr1 = u_buffer - Border 192 add r6, r1, r8 ; dest_ptr2 = src_ptr2 + plane_stride (src_ptr1 + (plane_height * plane_stride) 193 sub r2, r6, lr ; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride 194 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) 195 ble extra_uv_copy_needed ; plane_stride < 64 196 197copy_top_bottom_uv 198 vld1.8 {q0, q1}, [r1]! 199 vld1.8 {q8, q9}, [r2]! 200 vld1.8 {q2, q3}, [r1]! 201 vld1.8 {q10, q11}, [r2]! 202 203 mov r7, #16 ; Border 204 205top_bottom_16 206 subs r7, r7, #1 207 208 vst1.8 {q0, q1}, [r5]! 209 vst1.8 {q8, q9}, [r6]! 210 vst1.8 {q2, q3}, [r5]! 211 vst1.8 {q10, q11}, [r6]! 212 213 add r5, r5, lr ; dest_ptr1 += plane_stride 214 sub r5, r5, #64 215 add r6, r6, lr ; dest_ptr2 += plane_stride 216 sub r6, r6, #64 217 218 bne top_bottom_16 219 220 sub r5, r1, lr, asl #4 ; dest_ptr1 = src_ptr1 - (Border * plane_stride) 221 add r6, r2, lr ; dest_ptr2 = src_ptr2 + plane_stride 222 223 subs r12, r12, #1 224 bne copy_top_bottom_uv 225extra_uv_copy_needed 226 mov r7, lr, lsr #3 ; check to see if extra copy is needed 227 ands r7, r7, #0x7 228 bne extra_top_bottom_uv 229 230end_of_border_copy_uv 231 subs r10, r10, #1 232 ldrne r7, [r0, #yv12_buffer_config_v_buffer] ; src_ptr1 233 bne border_copy_uv 234 235 vpop {d8 - d15} 236 pop {r4 - r10, pc} 237 238;;;;;;;;;;;;;;;;;;;;;; 239extra_top_bottom_y 240 vld1.8 {q0}, [r1]! 241 vld1.8 {q2}, [r2]! 242 243 mov r9, #4 ; 32 >> 3 244 245extra_top_bottom_32 246 subs r9, r9, #1 247 248 vst1.8 {q0}, [r5], lr 249 vst1.8 {q2}, [r6], lr 250 vst1.8 {q0}, [r5], lr 251 vst1.8 {q2}, [r6], lr 252 vst1.8 {q0}, [r5], lr 253 vst1.8 {q2}, [r6], lr 254 vst1.8 {q0}, [r5], lr 255 vst1.8 {q2}, [r6], lr 256 vst1.8 {q0}, [r5], lr 257 vst1.8 {q2}, [r6], lr 258 vst1.8 {q0}, [r5], lr 259 vst1.8 {q2}, [r6], lr 260 vst1.8 {q0}, [r5], lr 261 vst1.8 {q2}, [r6], lr 262 vst1.8 {q0}, [r5], lr 263 vst1.8 {q2}, [r6], lr 264 bne extra_top_bottom_32 265 266 sub r5, r1, lr, asl #5 ; src_ptr1 - (Border * plane_stride) 267 add r6, r2, lr ; src_ptr2 + plane_stride 268 subs r7, r7, #1 269 bne extra_top_bottom_y 270 271 b end_of_border_copy_y 272 273extra_top_bottom_uv 274 vld1.8 {d0}, [r1]! 275 vld1.8 {d8}, [r2]! 276 277 mov r9, #2 ; 16 >> 3 278 279extra_top_bottom_16 280 subs r9, r9, #1 281 282 vst1.8 {d0}, [r5], lr 283 vst1.8 {d8}, [r6], lr 284 vst1.8 {d0}, [r5], lr 285 vst1.8 {d8}, [r6], lr 286 vst1.8 {d0}, [r5], lr 287 vst1.8 {d8}, [r6], lr 288 vst1.8 {d0}, [r5], lr 289 vst1.8 {d8}, [r6], lr 290 vst1.8 {d0}, [r5], lr 291 vst1.8 {d8}, [r6], lr 292 vst1.8 {d0}, [r5], lr 293 vst1.8 {d8}, [r6], lr 294 vst1.8 {d0}, [r5], lr 295 vst1.8 {d8}, [r6], lr 296 vst1.8 {d0}, [r5], lr 297 vst1.8 {d8}, [r6], lr 298 bne extra_top_bottom_16 299 300 sub r5, r1, lr, asl #4 ; src_ptr1 - (Border * plane_stride) 301 add r6, r2, lr ; src_ptr2 + plane_stride 302 subs r7, r7, #1 303 bne extra_top_bottom_uv 304 305 b end_of_border_copy_uv 306 307 ENDP 308 END 309