1@/***************************************************************************** 2@* 3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4@* 5@* Licensed under the Apache License, Version 2.0 (the "License"); 6@* you may not use this file except in compliance with the License. 7@* You may obtain a copy of the License at: 8@* 9@* http://www.apache.org/licenses/LICENSE-2.0 10@* 11@* Unless required by applicable law or agreed to in writing, software 12@* distributed under the License is distributed on an "AS IS" BASIS, 13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@* See the License for the specific language governing permissions and 15@* limitations under the License. 16@* 17@*****************************************************************************/ 18@/** 19@ ******************************************************************************* 20@ * ,:file 21@ * ihevc_mem_fns_neon.s 22@ * 23@ * ,:brief 24@ * Contains function definitions for memory manipulation 25@ * 26@ * ,:author 27@ * Naveen SR 28@ * 29@ * ,:par List of Functions: 30@ * - ihevc_memcpy() 31@ * - ihevc_memset_mul_8() 32@ * - ihevc_memset_16bit_mul_8() 33@ * 34@ * ,:remarks 35@ * None 36@ * 37@ ******************************************************************************* 38@*/ 39 40@/** 41@******************************************************************************* 42@* 43@* ,:brief 44@* memcpy of a 1d array 45@* 46@* ,:par Description: 47@* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 48@* 49@* ,:param[in] pu1_dst 50@* UWORD8 pointer to the destination 51@* 52@* ,:param[in] pu1_src 53@* UWORD8 pointer to the source 54@* 55@* ,:param[in] num_bytes 56@* number of bytes to copy 57@* ,:returns 58@* 59@* ,:remarks 60@* None 61@* 62@******************************************************************************* 63@*/ 64@void ihevc_memcpy_mul_8(UWORD8 *pu1_dst, 65@ UWORD8 *pu1_src, 66@ UWORD8 num_bytes) 67@**************Variables Vs Registers************************* 68@ r0 => *pu1_dst 69@ r1 => *pu1_src 70@ r2 => num_bytes 71 72.text 73.p2align 2 74 75 76 77 78 .global ihevc_memcpy_mul_8_a9q 79.type ihevc_memcpy_mul_8_a9q, %function 80 81ihevc_memcpy_mul_8_a9q: 82 83LOOP_NEON_MEMCPY_MUL_8: 84 @ Memcpy 8 bytes 85 VLD1.8 d0,[r1]! 86 VST1.8 d0,[r0]! 87 88 SUBS r2,r2,#8 89 BNE LOOP_NEON_MEMCPY_MUL_8 90 MOV PC,LR 91 92 93 94@******************************************************************************* 95@*/ 96@void ihevc_memcpy(UWORD8 *pu1_dst, 97@ UWORD8 *pu1_src, 98@ UWORD8 num_bytes) 99@**************Variables Vs Registers************************* 100@ r0 => *pu1_dst 101@ r1 => *pu1_src 102@ r2 => num_bytes 103 104 105 106 .global ihevc_memcpy_a9q 107.type ihevc_memcpy_a9q, %function 108 109ihevc_memcpy_a9q: 110 SUBS r2,#8 111 BLT ARM_MEMCPY 112LOOP_NEON_MEMCPY: 113 @ Memcpy 8 bytes 114 VLD1.8 d0,[r1]! 115 VST1.8 d0,[r0]! 116 117 SUBS r2,#8 118 BGE LOOP_NEON_MEMCPY 119 CMP r2,#-8 120 BXEQ LR 121 122ARM_MEMCPY: 123 ADD r2,#8 124 125LOOP_ARM_MEMCPY: 126 LDRB r3,[r1],#1 127 STRB r3,[r0],#1 128 SUBS r2,#1 129 BNE LOOP_ARM_MEMCPY 130 BX LR 131 132 133 134 135@void ihevc_memset_mul_8(UWORD8 *pu1_dst, 136@ UWORD8 value, 137@ UWORD8 num_bytes) 138@**************Variables Vs Registers************************* 139@ r0 => *pu1_dst 140@ r1 => value 141@ r2 => num_bytes 142 143.text 144.p2align 2 145 146 147 148 .global ihevc_memset_mul_8_a9q 149.type ihevc_memset_mul_8_a9q, %function 150 151ihevc_memset_mul_8_a9q: 152 153@ Assumptions: numbytes is either 8, 16 or 32 154 VDUP.8 d0,r1 155LOOP_MEMSET_MUL_8: 156 @ Memset 8 bytes 157 VST1.8 d0,[r0]! 158 159 SUBS r2,r2,#8 160 BNE LOOP_MEMSET_MUL_8 161 162 BX LR 163 164 165 166 167@void ihevc_memset(UWORD8 *pu1_dst, 168@ UWORD8 value, 169@ UWORD8 num_bytes) 170@**************Variables Vs Registers************************* 171@ r0 => *pu1_dst 172@ r1 => value 173@ r2 => num_bytes 174 175 176 177 .global ihevc_memset_a9q 178.type ihevc_memset_a9q, %function 179 180ihevc_memset_a9q: 181 SUBS r2,#8 182 BLT ARM_MEMSET 183 VDUP.8 d0,r1 184LOOP_NEON_MEMSET: 185 @ Memcpy 8 bytes 186 VST1.8 d0,[r0]! 187 188 SUBS r2,#8 189 BGE LOOP_NEON_MEMSET 190 CMP r2,#-8 191 BXEQ LR 192 193ARM_MEMSET: 194 ADD r2,#8 195 196LOOP_ARM_MEMSET: 197 STRB r1,[r0],#1 198 SUBS r2,#1 199 BNE LOOP_ARM_MEMSET 200 BX LR 201 202 203 204 205@void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst, 206@ UWORD16 value, 207@ UWORD8 num_words) 208@**************Variables Vs Registers************************* 209@ r0 => *pu2_dst 210@ r1 => value 211@ r2 => num_words 212 213.text 214.p2align 2 215 216 217 218 .global ihevc_memset_16bit_mul_8_a9q 219.type ihevc_memset_16bit_mul_8_a9q, %function 220 221ihevc_memset_16bit_mul_8_a9q: 222 223@ Assumptions: num_words is either 8, 16 or 32 224 225 @ Memset 8 words 226 VDUP.16 d0,r1 227LOOP_MEMSET_16BIT_MUL_8: 228 VST1.16 d0,[r0]! 229 VST1.16 d0,[r0]! 230 231 SUBS r2,r2,#8 232 BNE LOOP_MEMSET_16BIT_MUL_8 233 234 BX LR 235 236 237 238 239@void ihevc_memset_16bit(UWORD16 *pu2_dst, 240@ UWORD16 value, 241@ UWORD8 num_words) 242@**************Variables Vs Registers************************* 243@ r0 => *pu2_dst 244@ r1 => value 245@ r2 => num_words 246 247 248 249 .global ihevc_memset_16bit_a9q 250.type ihevc_memset_16bit_a9q, %function 251 252ihevc_memset_16bit_a9q: 253 SUBS r2,#8 254 BLT ARM_MEMSET_16BIT 255 VDUP.16 d0,r1 256LOOP_NEON_MEMSET_16BIT: 257 @ Memset 8 words 258 VST1.16 d0,[r0]! 259 VST1.16 d0,[r0]! 260 261 SUBS r2,#8 262 BGE LOOP_NEON_MEMSET_16BIT 263 CMP r2,#-8 264 BXEQ LR 265 266ARM_MEMSET_16BIT: 267 ADD r2,#8 268 269LOOP_ARM_MEMSET_16BIT: 270 STRH r1,[r0],#2 271 SUBS r2,#1 272 BNE LOOP_ARM_MEMSET_16BIT 273 BX LR 274 275 276 277 278 .section .note.GNU-stack,"",%progbits 279 280