1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_yv12_copy_frame_func_neon| 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 INCLUDE vpx_scale_asm_offsets.asm 18 19 AREA ||.text||, CODE, READONLY, ALIGN=2 20 21;void vp8_yv12_copy_frame_func_neon(const YV12_BUFFER_CONFIG *src_ybc, 22; YV12_BUFFER_CONFIG *dst_ybc); 23 24|vp8_yv12_copy_frame_func_neon| PROC 25 push {r4 - r11, lr} 26 vpush {d8 - d15} 27 28 sub sp, sp, #16 29 30 ;Copy Y plane 31 ldr r8, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 32 ldr r9, [r1, #yv12_buffer_config_u_buffer] ;srcptr1 33 ldr r10, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 34 ldr r11, [r1, #yv12_buffer_config_v_buffer] ;srcptr1 35 36 ldr r4, [r0, #yv12_buffer_config_y_height] 37 ldr r5, [r0, #yv12_buffer_config_y_width] 38 ldr r6, [r0, #yv12_buffer_config_y_stride] 39 ldr r7, [r1, #yv12_buffer_config_y_stride] 40 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 41 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 42 43 str r8, [sp] 44 str r9, [sp, #4] 45 str r10, [sp, #8] 46 str r11, [sp, #12] 47 48 ; copy two rows at one time 49 mov lr, r4, lsr #1 50 51cp_src_to_dst_height_loop 52 mov r8, r2 53 mov r9, r3 54 add r10, r2, r6 55 add r11, r3, r7 56 movs r12, r5, lsr #7 57 ble extra_cp_needed ; y_width < 128 58 59cp_src_to_dst_width_loop 60 vld1.8 {q0, q1}, [r8]! 61 vld1.8 {q8, q9}, [r10]! 62 vld1.8 {q2, q3}, [r8]! 63 vld1.8 {q10, q11}, [r10]! 64 vld1.8 {q4, q5}, [r8]! 65 vld1.8 {q12, q13}, [r10]! 66 vld1.8 {q6, q7}, [r8]! 67 vld1.8 {q14, q15}, [r10]! 68 69 subs r12, r12, #1 70 71 vst1.8 {q0, q1}, [r9]! 72 vst1.8 {q8, q9}, [r11]! 73 vst1.8 {q2, q3}, [r9]! 74 vst1.8 {q10, q11}, [r11]! 75 vst1.8 {q4, q5}, [r9]! 76 vst1.8 {q12, q13}, [r11]! 77 vst1.8 {q6, q7}, [r9]! 78 vst1.8 {q14, q15}, [r11]! 79 80 bne cp_src_to_dst_width_loop 81 82 subs lr, lr, #1 83 add r2, r2, r6, lsl #1 84 add r3, r3, r7, lsl #1 85 86 bne cp_src_to_dst_height_loop 87 88extra_cp_needed 89 ands r10, r5, #0x7f ;check to see if extra copy is needed 90 sub r11, r5, r10 91 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 92 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 93 bne extra_cp_src_to_dst_width 94end_of_cp_src_to_dst 95 96;Copy U & V planes 97 ldr r2, [sp] ;srcptr1 98 ldr r3, [sp, #4] ;dstptr1 99 mov r4, r4, lsr #1 ;src uv_height 100 mov r5, r5, lsr #1 ;src uv_width 101 mov r6, r6, lsr #1 ;src uv_stride 102 mov r7, r7, lsr #1 ;dst uv_stride 103 104 mov r1, #2 105 106cp_uv_loop 107 108 ;copy two rows at one time 109 mov lr, r4, lsr #1 110 111cp_src_to_dst_height_uv_loop 112 mov r8, r2 113 mov r9, r3 114 add r10, r2, r6 115 add r11, r3, r7 116 movs r12, r5, lsr #6 117 ble extra_uv_cp_needed 118 119cp_src_to_dst_width_uv_loop 120 vld1.8 {q0, q1}, [r8]! 121 vld1.8 {q8, q9}, [r10]! 122 vld1.8 {q2, q3}, [r8]! 123 vld1.8 {q10, q11}, [r10]! 124 125 subs r12, r12, #1 126 127 vst1.8 {q0, q1}, [r9]! 128 vst1.8 {q8, q9}, [r11]! 129 vst1.8 {q2, q3}, [r9]! 130 vst1.8 {q10, q11}, [r11]! 131 132 bne cp_src_to_dst_width_uv_loop 133 134 subs lr, lr, #1 135 add r2, r2, r6, lsl #1 136 add r3, r3, r7, lsl #1 137 138 bne cp_src_to_dst_height_uv_loop 139 140extra_uv_cp_needed 141 ands r10, r5, #0x3f ;check to see if extra copy is needed 142 sub r11, r5, r10 143 ldr r2, [sp] ;srcptr1 144 ldr r3, [sp, #4] ;dstptr1 145 bne extra_cp_src_to_dst_uv_width 146end_of_cp_src_to_dst_uv 147 148 subs r1, r1, #1 149 150 addne sp, sp, #8 151 152 ldrne r2, [sp] ;srcptr1 153 ldrne r3, [sp, #4] ;dstptr1 154 155 bne cp_uv_loop 156 157 add sp, sp, #8 158 159 vpop {d8 - d15} 160 pop {r4 - r11, pc} 161 162;============================= 163extra_cp_src_to_dst_width 164 add r2, r2, r11 165 add r3, r3, r11 166 add r0, r8, r6 167 add r11, r9, r7 168 169 mov lr, r4, lsr #1 170extra_cp_src_to_dst_height_loop 171 mov r8, r2 172 mov r9, r3 173 add r0, r8, r6 174 add r11, r9, r7 175 176 mov r12, r10 177 178extra_cp_src_to_dst_width_loop 179 vld1.8 {q0}, [r8]! 180 vld1.8 {q1}, [r0]! 181 182 subs r12, r12, #16 183 184 vst1.8 {q0}, [r9]! 185 vst1.8 {q1}, [r11]! 186 bne extra_cp_src_to_dst_width_loop 187 188 subs lr, lr, #1 189 190 add r2, r2, r6, lsl #1 191 add r3, r3, r7, lsl #1 192 193 bne extra_cp_src_to_dst_height_loop 194 195 b end_of_cp_src_to_dst 196 197;================================= 198extra_cp_src_to_dst_uv_width 199 add r2, r2, r11 200 add r3, r3, r11 201 add r0, r8, r6 202 add r11, r9, r7 203 204 mov lr, r4, lsr #1 205extra_cp_src_to_dst_height_uv_loop 206 mov r8, r2 207 mov r9, r3 208 add r0, r8, r6 209 add r11, r9, r7 210 211 mov r12, r10 212 213extra_cp_src_to_dst_width_uv_loop 214 vld1.8 {d0}, [r8]! 215 vld1.8 {d1}, [r0]! 216 217 subs r12, r12, #8 218 219 vst1.8 {d0}, [r9]! 220 vst1.8 {d1}, [r11]! 221 bne extra_cp_src_to_dst_width_uv_loop 222 223 subs lr, lr, #1 224 225 add r2, r2, r6, lsl #1 226 add r3, r3, r7, lsl #1 227 228 bne extra_cp_src_to_dst_height_uv_loop 229 230 b end_of_cp_src_to_dst_uv 231 232 ENDP 233 END 234