1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21@/**
22@******************************************************************************
23@*
24@* @brief :Evaluate best intra 16x16 mode (among VERT, HORZ and DC )
25@*                and do the prediction.
26@*
27@* @par Description
28@*   This function evaluates  first three 16x16 modes and compute corresponding sad
29@*   and return the buffer predicted with best mode.
30@*
31@* @param[in] pu1_src
32@*  UWORD8 pointer to the source
33@*
34@** @param[in] pu1_ngbr_pels_i16
35@*  UWORD8 pointer to neighbouring pels
36@*
37@* @param[out] pu1_dst
38@*  UWORD8 pointer to the destination
39@*
40@* @param[in] src_strd
41@*  integer source stride
42@*
43@* @param[in] dst_strd
44@*  integer destination stride
45@*
46@* @param[in] u4_n_avblty
47@* availability of neighbouring pixels
48@*
49@* @param[in] u4_intra_mode
50@* Pointer to the variable in which best mode is returned
51@*
52@* @param[in] pu4_sadmin
53@* Pointer to the variable in which minimum sad is returned
54@*
55@* @param[in] u4_valid_intra_modes
56@* Says what all modes are valid
57@*
58@*
59@* @return      none
60@*
61@******************************************************************************
62@*/
63@
64@void ih264e_evaluate_intra16x16_modes(UWORD8 *pu1_src,
65@                                      UWORD8 *pu1_ngbr_pels_i16,
66@                                      UWORD8 *pu1_dst,
67@                                      UWORD32 src_strd,
68@                                      UWORD32 dst_strd,
69@                                      WORD32 u4_n_avblty,
70@                                      UWORD32 *u4_intra_mode,
71@                                      WORD32 *pu4_sadmin,
72@                                      UWORD32 u4_valid_intra_modes)
73@
74.text
75.p2align 2
76
77    .global ih264e_evaluate_intra16x16_modes_a9q
78
79ih264e_evaluate_intra16x16_modes_a9q:
80
81@r0 = pu1_src,
82@r1 = pu1_ngbr_pels_i16,
83@r2 = pu1_dst,
84@r3 = src_strd,
85@r4 = dst_strd,
86@r5 = u4_n_avblty,
87@r6 = u4_intra_mode,
88@r7 = pu4_sadmin
89
90
91
92    stmfd         sp!, {r4-r12, r14}    @store register values to stack
93    ldr           r5, [sp, #44]
94
95
96    vpush         {d8-d15}
97    vld1.32       {q4}, [r1]!
98    sub           r6, r1, #1
99    add           r1, r1, #1
100    mov           r10, #0
101    vld1.32       {q5}, [r1]!
102    mov           r11, #0
103    mov           r4, #0
104    @/* Left available ???? */
105    ands          r7, r5, #01
106    movne         r10, #1
107
108    @/* Top  available ???? */
109    ands          r8, r5, #04
110    lsl           r9, r10, #3
111    movne         r11, #1
112    lsl           r12, r11, #3
113    adds          r8, r9, r12
114
115
116    @/* None available :( */
117    moveq         r4, #128
118
119
120
121@/fINDING dc val*/
122    @----------------------
123    vaddl.u8      q15, d8, d9
124
125    vaddl.u8      q14, d10, d11
126
127    vadd.u16      q15, q14, q15
128    @ VLD1.32  {q2},[r0],r3;row 2
129    vadd.u16      d30, d31, d30
130    vpadd.u16     d30, d30
131    @ VLD1.32  {q3},[r0],r3 ;row 3
132    vpadd.u16     d30, d30
133    @---------------------
134
135
136    vmov.u16      r7, d30[0]
137    add           r7, r7, r8
138    add           r11, r11, #3
139    add           r8, r10, r11
140
141    lsr           r7, r8
142    add           r7, r4, r7
143    vld1.32       {q0}, [r0], r3        @ source r0w 0
144    vdup.8        q15, r7               @dc val
145
146@/* computing SADs for all three modes*/
147    ldrb          r7, [r6]
148    vdup.8        q10, r7               @/HORIZONTAL VALUE ROW=0;
149    @/vertical row 0;
150    vabdl.u8      q8, d0, d10
151    vabdl.u8      q9, d1, d11
152    sub           r6, r6, #1
153    @/HORZ row 0;
154    vabdl.u8      q13, d0, d20
155    vabdl.u8      q14, d1, d21
156    mov           r1, #15
157    @/dc row 0;
158    vabdl.u8      q11, d0, d30
159    vabdl.u8      q12, d1, d31
160
161
162loop:
163    vld1.32       {q1}, [r0], r3        @row i
164    @/dc row i;
165    vabal.u8      q11, d2, d30
166    ldrb          r7, [r6]
167    vabal.u8      q12, d3, d31
168
169    @/vertical row i;
170    vabal.u8      q8, d2, d10
171    vdup.8        q10, r7               @/HORIZONTAL VALUE ROW=i;
172    sub           r6, r6, #1
173    vabal.u8      q9, d3, d11
174
175    subs          r1, r1, #1
176    @/HORZ row i;
177    vabal.u8      q13, d2, d20
178    vabal.u8      q14, d3, d21
179    bne           loop
180
181    @------------------------------------------------------------------------------
182
183    vadd.i16      q9, q9, q8            @/VERT
184    vadd.i16      d18, d19, d18         @/VERT
185    vpaddl.u16    d18, d18              @/VERT
186    vadd.i16      q14, q13, q14         @/HORZ
187    vadd.i16      d28, d29, d28         @/HORZ
188    vpaddl.u32    d18, d18              @/VERT
189    vpaddl.u16    d28, d28              @/HORZ
190
191    vpaddl.u32    d28, d28              @/HORZ
192    vmov.u32      r8, d18[0]            @ vert
193    vadd.i16      q12, q11, q12         @/DC
194    vmov.u32      r9, d28[0]            @horz
195    mov           r11, #1
196    vadd.i16      d24, d24, d25         @/DC
197    lsl           r11 , #30
198
199    @-----------------------
200    ldr           r0, [sp, #120]        @ u4_valid_intra_modes
201    @--------------------------------------------
202    ands          r7, r0, #01           @ vert mode valid????????????
203    moveq         r8, r11
204    vpaddl.u16    d24, d24              @/DC
205
206    ands          r6, r0, #02           @ horz mode valid????????????
207    moveq         r9, r11
208    vpaddl.u32    d24, d24              @/DC
209
210    vmov.u32      r10, d24[0]           @dc
211@--------------------------------
212    ldr           r4, [sp, #104]        @r4 = dst_strd,
213    ldr           r7, [sp, #116]        @r7 = pu4_sadmin
214@----------------------------------------------
215    ands          r6, r0, #04           @ dc mode valid????????????
216    moveq         r10, r11
217
218    @---------------------------
219    ldr           r6, [sp, #112]        @ R6 =MODE
220    @--------------------------
221
222    cmp           r8, r9
223    bgt           not_vert
224    cmp           r8, r10
225    bgt           do_dc
226
227    @/----------------------
228    @DO VERTICAL PREDICTION
229    str           r8 , [r7]             @MIN SAD
230    mov           r8, #0
231    str           r8 , [r6]             @ MODE
232    vmov          q15, q5
233
234    b             do_dc_vert
235    @-----------------------------
236not_vert:
237    cmp           r9, r10
238    bgt           do_dc
239
240    @/----------------------
241    @DO HORIZONTAL
242    vdup.8        q5, d9[7]             @0
243    str           r9 , [r7]             @MIN SAD
244    vdup.8        q6, d9[6]             @1
245    mov           r9, #1
246    vdup.8        q7, d9[5]             @2
247    vst1.32       {d10, d11} , [r2], r4 @0
248    vdup.8        q8, d9[4]             @3
249    str           r9 , [r6]             @ MODE
250    vdup.8        q9, d9[3]             @4
251    vst1.32       {d12, d13} , [r2], r4 @1
252    vdup.8        q10, d9[2]            @5
253    vst1.32       {d14, d15} , [r2], r4 @2
254    vdup.8        q11, d9[1]            @6
255    vst1.32       {d16, d17} , [r2], r4 @3
256    vdup.8        q12, d9[0]            @7
257    vst1.32       {d18, d19} , [r2], r4 @4
258    vdup.8        q13, d8[7]            @8
259    vst1.32       {d20, d21} , [r2], r4 @5
260    vdup.8        q14, d8[6]            @9
261    vst1.32       {d22, d23} , [r2], r4 @6
262    vdup.8        q15, d8[5]            @10
263    vst1.32       {d24, d25} , [r2], r4 @7
264    vdup.8        q1, d8[4]             @11
265    vst1.32       {d26, d27} , [r2], r4 @8
266    vdup.8        q2, d8[3]             @12
267    vst1.32       {d28, d29} , [r2], r4 @9
268    vdup.8        q3, d8[2]             @13
269    vst1.32       {d30, d31}, [r2], r4  @10
270    vdup.8        q5, d8[1]             @14
271    vst1.32       {d2, d3} , [r2], r4   @11
272    vdup.8        q6, d8[0]             @15
273    vst1.32       {d4, d5} , [r2], r4   @12
274
275    vst1.32       {d6, d7} , [r2], r4   @13
276
277    vst1.32       {d10, d11} , [r2], r4 @14
278
279    vst1.32       {d12, d13} , [r2], r4 @15
280    b             end_func
281
282
283    @/-----------------------------
284
285do_dc: @/---------------------------------
286    @DO DC
287    str           r10 , [r7]            @MIN SAD
288    mov           r10, #2
289    str           r10 , [r6]            @ MODE
290do_dc_vert:
291    vst1.32       {d30, d31}, [r2], r4  @0
292    vst1.32       {d30, d31}, [r2], r4  @1
293    vst1.32       {d30, d31}, [r2], r4  @2
294    vst1.32       {d30, d31}, [r2], r4  @3
295    vst1.32       {d30, d31}, [r2], r4  @4
296    vst1.32       {d30, d31}, [r2], r4  @5
297    vst1.32       {d30, d31}, [r2], r4  @6
298    vst1.32       {d30, d31}, [r2], r4  @7
299    vst1.32       {d30, d31}, [r2], r4  @8
300    vst1.32       {d30, d31}, [r2], r4  @9
301    vst1.32       {d30, d31}, [r2], r4  @10
302    vst1.32       {d30, d31}, [r2], r4  @11
303    vst1.32       {d30, d31}, [r2], r4  @12
304    vst1.32       {d30, d31}, [r2], r4  @13
305    vst1.32       {d30, d31}, [r2], r4  @14
306    vst1.32       {d30, d31}, [r2], r4  @15
307    @/------------------
308end_func:
309    vpop          {d8-d15}
310    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
311
312
313