1; 2; Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3; 4; Use of this source code is governed by a BSD-style license 5; that can be found in the LICENSE file in the root of the source 6; tree. An additional intellectual property rights grant can be found 7; in the file PATENTS. All contributing project authors may 8; be found in the AUTHORS file in the root of the source tree. 9; 10 11 12 EXPORT |vp8_yv12_copy_src_frame_func_neon| 13 ARM 14 REQUIRE8 15 PRESERVE8 16 17 INCLUDE vpx_scale_asm_offsets.asm 18 19 AREA ||.text||, CODE, READONLY, ALIGN=2 20;Note: This function is used to copy source data in src_buffer[i] at beginning 21;of the encoding. The buffer has a width and height of cpi->oxcf.Width and 22;cpi->oxcf.Height, which can be ANY numbers(NOT always multiples of 16 or 4). 23 24;void vp8_yv12_copy_src_frame_func_neon(const YV12_BUFFER_CONFIG *src_ybc, 25; YV12_BUFFER_CONFIG *dst_ybc); 26 27|vp8_yv12_copy_src_frame_func_neon| PROC 28 push {r4 - r11, lr} 29 vpush {d8 - d15} 30 31 ;Copy Y plane 32 ldr r4, [r0, #yv12_buffer_config_y_height] 33 ldr r5, [r0, #yv12_buffer_config_y_width] 34 ldr r6, [r0, #yv12_buffer_config_y_stride] 35 ldr r7, [r1, #yv12_buffer_config_y_stride] 36 ldr r2, [r0, #yv12_buffer_config_y_buffer] ;srcptr1 37 ldr r3, [r1, #yv12_buffer_config_y_buffer] ;dstptr1 38 39 add r10, r2, r6 ;second row src 40 add r11, r3, r7 ;second row dst 41 mov r6, r6, lsl #1 42 mov r7, r7, lsl #1 43 sub r6, r6, r5 ;adjust stride 44 sub r7, r7, r5 45 46 ; copy two rows at one time 47 mov lr, r4, lsr #1 48 49cp_src_to_dst_height_loop 50 mov r12, r5 51 52cp_width_128_loop 53 vld1.8 {q0, q1}, [r2]! 54 vld1.8 {q4, q5}, [r10]! 55 vld1.8 {q2, q3}, [r2]! 56 vld1.8 {q6, q7}, [r10]! 57 vld1.8 {q8, q9}, [r2]! 58 vld1.8 {q12, q13}, [r10]! 59 vld1.8 {q10, q11}, [r2]! 60 vld1.8 {q14, q15}, [r10]! 61 sub r12, r12, #128 62 cmp r12, #128 63 vst1.8 {q0, q1}, [r3]! 64 vst1.8 {q4, q5}, [r11]! 65 vst1.8 {q2, q3}, [r3]! 66 vst1.8 {q6, q7}, [r11]! 67 vst1.8 {q8, q9}, [r3]! 68 vst1.8 {q12, q13}, [r11]! 69 vst1.8 {q10, q11}, [r3]! 70 vst1.8 {q14, q15}, [r11]! 71 bhs cp_width_128_loop 72 73 cmp r12, #0 74 beq cp_width_done 75 76cp_width_8_loop 77 vld1.8 {d0}, [r2]! 78 vld1.8 {d1}, [r10]! 79 sub r12, r12, #8 80 cmp r12, #8 81 vst1.8 {d0}, [r3]! 82 vst1.8 {d1}, [r11]! 83 bhs cp_width_8_loop 84 85 cmp r12, #0 86 beq cp_width_done 87 88cp_width_1_loop 89 ldrb r8, [r2], #1 90 subs r12, r12, #1 91 strb r8, [r3], #1 92 ldrb r8, [r10], #1 93 strb r8, [r11], #1 94 bne cp_width_1_loop 95 96cp_width_done 97 subs lr, lr, #1 98 add r2, r2, r6 99 add r3, r3, r7 100 add r10, r10, r6 101 add r11, r11, r7 102 bne cp_src_to_dst_height_loop 103 104;copy last line for Y if y_height is odd 105 tst r4, #1 106 beq cp_width_done_1 107 mov r12, r5 108 109cp_width_128_loop_1 110 vld1.8 {q0, q1}, [r2]! 111 vld1.8 {q2, q3}, [r2]! 112 vld1.8 {q8, q9}, [r2]! 113 vld1.8 {q10, q11}, [r2]! 114 sub r12, r12, #128 115 cmp r12, #128 116 vst1.8 {q0, q1}, [r3]! 117 vst1.8 {q2, q3}, [r3]! 118 vst1.8 {q8, q9}, [r3]! 119 vst1.8 {q10, q11}, [r3]! 120 bhs cp_width_128_loop_1 121 122 cmp r12, #0 123 beq cp_width_done_1 124 125cp_width_8_loop_1 126 vld1.8 {d0}, [r2]! 127 sub r12, r12, #8 128 cmp r12, #8 129 vst1.8 {d0}, [r3]! 130 bhs cp_width_8_loop_1 131 132 cmp r12, #0 133 beq cp_width_done_1 134 135cp_width_1_loop_1 136 ldrb r8, [r2], #1 137 subs r12, r12, #1 138 strb r8, [r3], #1 139 bne cp_width_1_loop_1 140cp_width_done_1 141 142;Copy U & V planes 143 ldr r4, [r0, #yv12_buffer_config_uv_height] 144 ldr r5, [r0, #yv12_buffer_config_uv_width] 145 ldr r6, [r0, #yv12_buffer_config_uv_stride] 146 ldr r7, [r1, #yv12_buffer_config_uv_stride] 147 ldr r2, [r0, #yv12_buffer_config_u_buffer] ;srcptr1 148 ldr r3, [r1, #yv12_buffer_config_u_buffer] ;dstptr1 149 150 add r10, r2, r6 ;second row src 151 add r11, r3, r7 ;second row dst 152 mov r6, r6, lsl #1 153 mov r7, r7, lsl #1 154 sub r6, r6, r5 ;adjust stride 155 sub r7, r7, r5 156 157 mov r9, #2 158 159cp_uv_loop 160 ;copy two rows at one time 161 mov lr, r4, lsr #1 162 163cp_src_to_dst_height_uv_loop 164 mov r12, r5 165 166cp_width_uv_64_loop 167 vld1.8 {q0, q1}, [r2]! 168 vld1.8 {q4, q5}, [r10]! 169 vld1.8 {q2, q3}, [r2]! 170 vld1.8 {q6, q7}, [r10]! 171 sub r12, r12, #64 172 cmp r12, #64 173 vst1.8 {q0, q1}, [r3]! 174 vst1.8 {q4, q5}, [r11]! 175 vst1.8 {q2, q3}, [r3]! 176 vst1.8 {q6, q7}, [r11]! 177 bhs cp_width_uv_64_loop 178 179 cmp r12, #0 180 beq cp_width_uv_done 181 182cp_width_uv_8_loop 183 vld1.8 {d0}, [r2]! 184 vld1.8 {d1}, [r10]! 185 sub r12, r12, #8 186 cmp r12, #8 187 vst1.8 {d0}, [r3]! 188 vst1.8 {d1}, [r11]! 189 bhs cp_width_uv_8_loop 190 191 cmp r12, #0 192 beq cp_width_uv_done 193 194cp_width_uv_1_loop 195 ldrb r8, [r2], #1 196 subs r12, r12, #1 197 strb r8, [r3], #1 198 ldrb r8, [r10], #1 199 strb r8, [r11], #1 200 bne cp_width_uv_1_loop 201 202cp_width_uv_done 203 subs lr, lr, #1 204 add r2, r2, r6 205 add r3, r3, r7 206 add r10, r10, r6 207 add r11, r11, r7 208 bne cp_src_to_dst_height_uv_loop 209 210;copy last line for U & V if uv_height is odd 211 tst r4, #1 212 beq cp_width_uv_done_1 213 mov r12, r5 214 215cp_width_uv_64_loop_1 216 vld1.8 {q0, q1}, [r2]! 217 vld1.8 {q2, q3}, [r2]! 218 sub r12, r12, #64 219 cmp r12, #64 220 vst1.8 {q0, q1}, [r3]! 221 vst1.8 {q2, q3}, [r3]! 222 bhs cp_width_uv_64_loop_1 223 224 cmp r12, #0 225 beq cp_width_uv_done_1 226 227cp_width_uv_8_loop_1 228 vld1.8 {d0}, [r2]! 229 sub r12, r12, #8 230 cmp r12, #8 231 vst1.8 {d0}, [r3]! 232 bhs cp_width_uv_8_loop_1 233 234 cmp r12, #0 235 beq cp_width_uv_done_1 236 237cp_width_uv_1_loop_1 238 ldrb r8, [r2], #1 239 subs r12, r12, #1 240 strb r8, [r3], #1 241 bne cp_width_uv_1_loop_1 242cp_width_uv_done_1 243 244 subs r9, r9, #1 245 ldrne r2, [r0, #yv12_buffer_config_v_buffer] ;srcptr1 246 ldrne r3, [r1, #yv12_buffer_config_v_buffer] ;dstptr1 247 ldrne r10, [r0, #yv12_buffer_config_uv_stride] 248 ldrne r11, [r1, #yv12_buffer_config_uv_stride] 249 250 addne r10, r2, r10 ;second row src 251 addne r11, r3, r11 ;second row dst 252 253 bne cp_uv_loop 254 255 vpop {d8 - d15} 256 pop {r4 - r11, pc} 257 258 ENDP 259 END 260