1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20///** 21// ******************************************************************************* 22// * @file 23// * ih264_mem_fns_neon.s 24// * 25// * @brief 26// * Contains function definitions for memory manipulation 27// * 28// * @author 29// * Naveen SR 30// * 31// * @par List of Functions: 32// * - ih264_memcpy_av8() 33// * - ih264_memcpy_mul_8_av8() 34// * - ih264_memset_mul_8_av8() 35// * - ih264_memset_16bit_mul_8_av8() 36// * - ih264_memset_16bit_av8() 37// * 38// * @remarks 39// * None 40// * 41// ******************************************************************************* 42//*/ 43 44.text 45.p2align 2 46.include "ih264_neon_macros.s" 47///** 48//******************************************************************************* 49//* 50//* @brief 51//* memcpy of a 1d array 52//* 53//* @par Description: 54//* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 55//* 56//* @param[in] pu1_dst 57//* UWORD8 pointer to the destination 58//* 59//* @param[in] pu1_src 60//* UWORD8 pointer to the source 61//* 62//* @param[in] num_bytes 63//* number of bytes to copy 64//* @returns 65//* 66//* @remarks 67//* None 68//* 69//******************************************************************************* 70//*/ 71//void ih264_memcpy_mul_8(UWORD8 *pu1_dst, 72// UWORD8 *pu1_src, 73// UWORD32 num_bytes) 74//**************Variables Vs Registers************************* 75// x0 => *pu1_dst 76// x1 => *pu1_src 77// w2 => num_bytes 78 79 80 81 82 83 .global ih264_memcpy_mul_8_av8 84 85ih264_memcpy_mul_8_av8: 86 87loop_neon_memcpy_mul_8: 88 // Memcpy 8 bytes 89 ld1 {v0.8b}, [x1], #8 90 st1 {v0.8b}, [x0], #8 91 92 subs w2, w2, #8 93 bne loop_neon_memcpy_mul_8 94 ret 95 96 97 98//******************************************************************************* 99//*/ 100//void ih264_memcpy(UWORD8 *pu1_dst, 101// UWORD8 *pu1_src, 102// UWORD32 num_bytes) 103//**************Variables Vs Registers************************* 104// x0 => *pu1_dst 105// x1 => *pu1_src 106// w2 => num_bytes 107 108 109 110 .global ih264_memcpy_av8 111 112ih264_memcpy_av8: 113 subs w2, w2, #8 114 blt arm_memcpy 115loop_neon_memcpy: 116 // Memcpy 8 bytes 117 ld1 {v0.8b}, [x1], #8 118 st1 {v0.8b}, [x0], #8 119 120 subs w2, w2, #8 121 bge loop_neon_memcpy 122 cmn w2, #8 123 beq end_func1 124 125arm_memcpy: 126 add w2, w2, #8 127 128loop_arm_memcpy: 129 ldrb w3, [x1], #1 130 strb w3, [x0], #1 131 subs w2, w2, #1 132 bne loop_arm_memcpy 133 ret 134end_func1: 135 ret 136 137 138//void ih264_memset_mul_8(UWORD8 *pu1_dst, 139// UWORD8 value, 140// UWORD32 num_bytes) 141//**************Variables Vs Registers************************* 142// x0 => *pu1_dst 143// x1 => value 144// x2 => num_bytes 145 146 147 .global ih264_memset_mul_8_av8 148 149ih264_memset_mul_8_av8: 150 151// Assumptions: numbytes is either 8, 16 or 32 152 dup v0.8b, w1 153loop_memset_mul_8: 154 // Memset 8 bytes 155 st1 {v0.8b}, [x0], #8 156 157 subs w2, w2, #8 158 bne loop_memset_mul_8 159 160 ret 161 162 163//void ih264_memset(UWORD8 *pu1_dst, 164// UWORD8 value, 165// UWORD32 num_bytes) 166//**************Variables Vs Registers************************* 167// x0 => *pu1_dst 168// w1 => value 169// w2 => num_bytes 170 171 172 173 .global ih264_memset_av8 174 175ih264_memset_av8: 176 subs w2, w2, #8 177 blt arm_memset 178 dup v0.8b, w1 179loop_neon_memset: 180 // Memcpy 8 bytes 181 st1 {v0.8b}, [x0], #8 182 183 subs w2, w2, #8 184 bge loop_neon_memset 185 cmn w2, #8 186 beq end_func2 187 188arm_memset: 189 add w2, w2, #8 190 191loop_arm_memset: 192 strb w1, [x0], #1 193 subs w2, w2, #1 194 bne loop_arm_memset 195 ret 196end_func2: 197 ret 198 199 200 201 202 203//void ih264_memset_16bit_mul_8(UWORD16 *pu2_dst, 204// UWORD16 value, 205// UWORD32 num_words) 206//**************Variables Vs Registers************************* 207// x0 => *pu2_dst 208// w1 => value 209// w2 => num_words 210 211 212 .global ih264_memset_16bit_mul_8_av8 213 214ih264_memset_16bit_mul_8_av8: 215 216// Assumptions: num_words is either 8, 16 or 32 217 218 // Memset 8 words 219 dup v0.4h, w1 220loop_memset_16bit_mul_8: 221 st1 {v0.4h}, [x0], #8 222 st1 {v0.4h}, [x0], #8 223 224 subs w2, w2, #8 225 bne loop_memset_16bit_mul_8 226 227 ret 228 229 230 231//void ih264_memset_16bit(UWORD16 *pu2_dst, 232// UWORD16 value, 233// UWORD32 num_words) 234//**************Variables Vs Registers************************* 235// x0 => *pu2_dst 236// w1 => value 237// w2 => num_words 238 239 240 241 .global ih264_memset_16bit_av8 242 243ih264_memset_16bit_av8: 244 subs w2, w2, #8 245 blt arm_memset_16bit 246 dup v0.4h, w1 247loop_neon_memset_16bit: 248 // Memset 8 words 249 st1 {v0.4h}, [x0], #8 250 st1 {v0.4h}, [x0], #8 251 252 subs w2, w2, #8 253 bge loop_neon_memset_16bit 254 cmn w2, #8 255 beq end_func3 256 257arm_memset_16bit: 258 add w2, w2, #8 259 260loop_arm_memset_16bit: 261 strh w1, [x0], #2 262 subs w2, w2, #1 263 bne loop_arm_memset_16bit 264 ret 265 266end_func3: 267 ret 268 269 270 271