1@/***************************************************************************** 2@* 3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4@* 5@* Licensed under the Apache License, Version 2.0 (the "License"); 6@* you may not use this file except in compliance with the License. 7@* You may obtain a copy of the License at: 8@* 9@* http://www.apache.org/licenses/LICENSE-2.0 10@* 11@* Unless required by applicable law or agreed to in writing, software 12@* distributed under the License is distributed on an "AS IS" BASIS, 13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@* See the License for the specific language governing permissions and 15@* limitations under the License. 16@* 17@*****************************************************************************/ 18@/** 19@/******************************************************************************* 20@* @file 21@* ihevcd_fmt_conv_420sp_to_420p.s 22@* 23@* @brief 24@* contains function definitions for format conversions 25@* 26@* @author 27@* ittiam 28@* 29@* @par list of functions: 30@* 31@* 32@* @remarks 33@* none 34@* 35@*******************************************************************************/ 36 37 38 39 40 41 42 43 44.text 45 46 47 48 49 50@/***************************************************************************** 51@* * 52@* Function Name : neon_copy_yuv420sp_to_yuv420p() * 53@* * 54@* Description : This function conversts the image from YUV420sP color * 55@* space to 420SP color space(UV interleaved). * 56@* * 57@* Arguments : R0 pu1_src_y * 58@* R1 pu1_src_uv * 59@* R2 pu1_dest_y * 60@* R3 pu1_dest_u * 61@* [R13 #40] pu1_dest_v * 62@* [R13 #44] u2_width * 63@* [R13 #48] u2_height * 64@* [R13 #52] u2_stridey * 65@* [R13 #56] u2_strideuv * 66@* [R13 #60] u2_dest_stridey * 67@* [R13 #64] u2_dest_strideuv * 68@* [R13 #68] is_u_first * 69@* [R13 #72] disable_luma_copy * 70@* * 71@* Values Returned : None * 72@* * 73@* Register Usage : R0 - R14 * 74@* * 75@* Stack Usage : 40 Bytes * 76@* * 77@* Interruptibility : Interruptible * 78@* * 79@* Known Limitations * 80@* Assumptions: Image Width: Assumed to be multiple of 2 and * 81@* Image Height: Assumed to be even. * 82@* * 83@* Revision History : * 84@* DD MM YYYY Author(s) Changes (Describe the changes made) * 85@* 16 05 2012 Naveen SR draft * 86@* * 87@*****************************************************************************/ 88 89.globl ihevcd_fmt_conv_420sp_to_420p_a9q 90 91.type ihevcd_fmt_conv_420sp_to_420p_a9q, %function 92 93ihevcd_fmt_conv_420sp_to_420p_a9q: 94 STMFD sp!,{r4-r12, lr} 95 96 LDR r5,[sp,#60] @//Load u2_dest_stridey 97@ LDR r6,[sp,#56] @//Load u2_strideuv 98 LDR r7,[sp,#52] @//Load u2_stridey 99 LDR r8,[sp,#44] @//Load u2_width 100 LDR r9,[sp,#48] @//Load u2_height 101 102 SUB r10,r7,r8 @// Src Y increment 103 SUB r11,r5,r8 @// Dst Y increment 104 105 LDR r5,[sp,#72] @//Load disable_luma_copy flag 106 CMP r5,#0 @//skip luma if disable_luma_copy is non-zero 107 BNE uv_copy_start 108 109 @/* Copy Y */ 110 111 MOV r4,r9 @// Copying height 112y_row_loop: 113 MOV r6,r8 @// Copying width 114 115y_col_loop: 116 117 SUB r6,r6,#16 118 vld1.8 {d0,d1},[r0]! 119 vst1.8 {d0,d1},[r2]! 120 CMP r6,#16 121 BGE y_col_loop 122 CMP r6,#0 123 BEQ y_col_loop_end 124 @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 125 @//Ex if width is 162, above loop will process 160 pixels. And 126 @//Both source and destination will point to 146th pixel and then 16 bytes will be read 127 @// and written using VLD1 and VST1 128 RSB r6,r6,#16 129 SUB r0,r0,r6 130 SUB r2,r2,r6 131 vld1.8 {d0,d1}, [r0]! 132 vst1.8 {d0,d1}, [r2]! 133 134y_col_loop_end: 135 ADD r0, r0, r10 136 ADD r2, r2, r11 137 SUBS r4, r4, #1 138 BGT y_row_loop 139 140 141 @/* Copy UV */ 142uv_copy_start: 143 144 LDR r5,[sp,#64] @//Load u2_dest_strideuv 145 LDR r7,[sp,#56] @//Load u2_strideuv 146 147 MOV r9,r9,LSR #1 @// height/2 148@ MOV r8,r8,LSR #1 @// Width/2 149 150 SUB r10,r7,r8 @// Src UV increment 151 MOV r11,r8,LSR #1 152 SUB r11,r5,r11 @// Dst U and V increment 153 154 LDR r5,[sp,#40] @//Load pu1_dest_v 155 156 LDR r4,[sp,#68] @//Load is_u_first_flag 157 CMP r4,#0 @//Swap U and V dest if is_u_first_flag is zero 158 MOVEQ r4,r5 159 MOVEQ r5,r3 160 MOVEQ r3,r4 161 162 MOV r4,r9 @// Copying height 163uv_row_loop: 164 MOV r6,r8 @// Copying width 165 166uv_col_loop: 167 168 SUB r6,r6,#16 169 170 PLD [r1,#128] 171 vld2.8 {d0,d1},[r1]! 172 VST1.8 D0,[r3]! 173 VST1.8 D1,[r5]! 174 CMP r6,#16 175 BGE uv_col_loop 176 CMP r6,#0 177 BEQ uv_col_loop_end 178 @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read 179 @//Ex if width is 162, above loop will process 160 pixels. And 180 @//Both source and destination will point to 146th pixel and then 16 bytes will be read 181 @// and written using VLD1 and VST1 182 RSB r6,r6,#16 183 SUB r1,r1,r6 184 SUB r3,r3,r6,LSR #1 185 SUB r5,r5,r6,LSR #1 186 vld2.8 {d0,d1}, [r1]! 187 VST1.8 D0, [r3]! 188 VST1.8 D1, [r5]! 189uv_col_loop_end: 190 ADD r1, r1, r10 191 ADD r3, r3, r11 192 ADD r5, r5, r11 193 SUBS r4, r4, #1 194 BGT uv_row_loop 195 196exit: 197 LDMFD sp!,{r4-r12, pc} 198 199 200 201 202 203 204