1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20@**
21@******************************************************************************
22@* @file
23@*  ih264_intra_pred_luma_8x8_a9q.s
24@*
25@* @brief
26@*  Contains function definitions for intra 8x8 Luma prediction .
27@*
28@* @author
29@*  Ittiam
30@*
31@* @par List of Functions:
32@*
33@*  -ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
34@*  -ih264_intra_pred_luma_8x8_mode_vert_a9q
35@*  -ih264_intra_pred_luma_8x8_mode_horz_a9q
36@*  -ih264_intra_pred_luma_8x8_mode_dc_a9q
37@*  -ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
38@*  -ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
39@*  -ih264_intra_pred_luma_8x8_mode_vert_r_a9q
40@*  -ih264_intra_pred_luma_8x8_mode_horz_d_a9q
41@*  -ih264_intra_pred_luma_8x8_mode_vert_l_a9q
42@*  -ih264_intra_pred_luma_8x8_mode_horz_u_a9q
43@*
44@* @remarks
45@*  None
46@*
47@*******************************************************************************
48@*
49
50@* All the functions here are replicated from ih264_intra_pred_filters.c
51@
52
53.text
54.p2align 2
55
56    .extern ih264_gai1_intrapred_luma_8x8_horz_u
57.hidden ih264_gai1_intrapred_luma_8x8_horz_u
58scratch_intrapred_addr_8x8:
59    .long ih264_gai1_intrapred_luma_8x8_horz_u -  scrlb8x8l2 - 8
60
61@**
62@*******************************************************************************
63@*
64@*ih264_intra_pred_luma_8x8_mode_ref_filtering
65@*
66@* @brief
67@* Reference sample filtering process for Intra_8x8 sample prediction
68@*
69@* @par Description:
70@*  Perform Reference sample filtering process for Intra_8x8 sample prediction ,described in sec 8.3.2.2.1
71@*
72@* @param[in] pu1_src
73@*  UWORD8 pointer to the source
74@*
75@* @param[out] pu1_dst
76@*  UWORD8 pointer to the destination
77@*
78@* @param[in] src_strd
79@*  integer source stride [Not used]
80@*
81@* @param[in] dst_strd
82@*  integer destination stride[Not used]
83@*
84@* @param[in] ui_neighboravailability
85@*  availability of neighbouring pixels[Not used]
86@*
87@* @returns
88@*
89@* @remarks
90@*  None
91@*
92@*******************************************************************************
93@void ih264_intra_pred_luma_8x8_mode_ref_filtering(UWORD8 *pu1_src,
94@                                                 UWORD8 *pu1_dst)
95
96@**************Variables Vs Registers*****************************************
97@   r0 => *pu1_src
98@   r1 => *pu1_dst
99
100
101    .global ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q
102
103ih264_intra_pred_luma_8x8_mode_ref_filtering_a9q:
104
105    stmfd         sp!, {r4-r12, r14}    @store register values to stack
106    vpush         {d8-d15}
107
108    vld1.u8       {q0}, [r0]!           @
109    vld1.u8       {q1}, [r0]
110    add           r0, r0, #8            @
111    vext.8        q2, q0, q1, #1
112    vext.8        q3, q1, q1, #1
113    vext.8        q4, q2, q3, #1
114    vext.8        q5, q3, q3, #1
115    vld1.8        {d10[7]}, [r0]        @ LOADING SRC[24] AGIN TO THE END FOR p'[ 15, -1 ] = ( p[ 14, -1 ] + 3 * p[ 15, -1 ] + 2 ) >> 2
116    vaddl.u8      q10, d0, d4
117    vaddl.u8      q7, d0, d0            @    SPECIAL CASE FOR p'[ -1 ,7 ] = ( p[ -1, 6 ] + 3 * p[ -1, 7 ] + 2 ) >> 2
118    vadd.u16      q7, q10, q7
119    vaddl.u8      q11, d1, d5
120    vqrshrun.s16  d14, q7, #2
121    vaddl.u8      q12, d4, d8
122    vaddl.u8      q13, d5, d9
123    vst1.8        {d14[0]}, [r1]!
124    vadd.u16      q12, q10, q12
125    vadd.u16      q13, q11, q13
126    vaddl.u8      q9, d2, d6
127    vaddl.u8      q8, d6, d10
128    vqrshrun.s16  d4, q12, #2
129    vqrshrun.s16  d5, q13, #2
130    vadd.u16      q6, q8, q9
131    vst1.8        {q2}, [r1]!
132    vqrshrun.s16  d6, q6, #2
133    vst1.8        {d6}, [r1]
134
135
136end_func_ref_filt:
137
138    vpop          {d8-d15}
139    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
140
141
142
143
144
145
146@**
147@*******************************************************************************
148@*
149@*ih264_intra_pred_luma_8x8_mode_vert
150@*
151@* @brief
152@*   Perform Intra prediction for  luma_8x8 mode:vertical
153@*
154@* @par Description:
155@* Perform Intra prediction for  luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
156@*
157@* @param[in] pu1_src
158@*  UWORD8 pointer to the source
159@*
160@* @param[out] pu1_dst
161@*  UWORD8 pointer to the destination
162@*
163@* @param[in] src_strd
164@*  integer source stride
165@*
166@* @param[in] dst_strd
167@*  integer destination stride
168@*
169@* @param[in] ui_neighboravailability
170@* availability of neighbouring pixels(Not used in this function)
171@*
172@* @returns
173@*
174@* @remarks
175@*  None
176@*
177@*******************************************************************************
178@void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
179@                                        UWORD8 *pu1_dst,
180@                                        WORD32 src_strd,
181@                                        WORD32 dst_strd,
182@                                        WORD32 ui_neighboravailability)
183
184@**************Variables Vs Registers*****************************************
185@   r0 => *pu1_src
186@   r1 => *pu1_dst
187@   r2 =>  src_strd
188@   r3 =>  dst_strd
189@   r4 =>  ui_neighboravailability
190
191
192    .global ih264_intra_pred_luma_8x8_mode_vert_a9q
193
194ih264_intra_pred_luma_8x8_mode_vert_a9q:
195
196    stmfd         sp!, {r4-r12, r14}    @store register values to stack
197
198    add           r0, r0, #9
199    vld1.8        d0, [r0]
200
201    vst1.8        d0, [r1], r3
202    vst1.8        d0, [r1], r3
203    vst1.8        d0, [r1], r3
204    vst1.8        d0, [r1], r3
205    vst1.8        d0, [r1], r3
206    vst1.8        d0, [r1], r3
207    vst1.8        d0, [r1], r3
208    vst1.8        d0, [r1], r3
209
210    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
211
212
213
214
215
216@******************************************************************************
217
218
219@**
220@*******************************************************************************
221@*
222@*ih264_intra_pred_luma_8x8_mode_horz
223@*
224@* @brief
225@*  Perform Intra prediction for  luma_8x8 mode:horizontal
226@*
227@* @par Description:
228@*  Perform Intra prediction for  luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
229@*
230@* @param[in] pu1_src
231@*  UWORD8 pointer to the source
232@*
233@* @param[out] pu1_dst
234@*  UWORD8 pointer to the destination
235@*
236@* @param[in] src_strd
237@*  integer source stride
238@*
239@* @param[in] dst_strd
240@*  integer destination stride
241@*
242@* @param[in] ui_neighboravailability
243@* availability of neighbouring pixels(Not used in this function)
244@*
245@* @returns
246@*
247@* @remarks
248@*  None
249@*
250@*******************************************************************************
251@*
252@void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
253@                                         UWORD8 *pu1_dst,
254@                                         WORD32 src_strd,
255@                                         WORD32 dst_strd,
256@                                         WORD32 ui_neighboravailability)
257@**************Variables Vs Registers*****************************************
258@   r0 => *pu1_src
259@   r1 => *pu1_dst
260@   r2 =>  src_strd
261@   r3 =>  dst_strd
262@   r4 =>  ui_neighboravailability
263
264
265    .global ih264_intra_pred_luma_8x8_mode_horz_a9q
266
267ih264_intra_pred_luma_8x8_mode_horz_a9q:
268
269    stmfd         sp!, {r14}            @store register values to stack
270
271    vld1.u8       {d0}, [r0]
272    mov           r2, #6
273
274    vdup.u8       d1, d0[7]
275    vdup.u8       d2, d0[6]
276    vst1.8        {d1}, [r1], r3
277
278loop_8x8_horz:
279    vext.8        d0, d0, d0, #6
280    vst1.8        {d2}, [r1], r3
281    vdup.u8       d1, d0[7]
282    subs          r2, #2
283    vdup.u8       d2, d0[6]
284    vst1.8        {d1}, [r1], r3
285    bne           loop_8x8_horz
286
287    vext.8        d0, d0, d0, #6
288    vst1.8        {d2}, [r1], r3
289
290    ldmfd         sp!, {pc}             @restoring registers from stack
291
292
293
294
295
296@******************************************************************************
297
298
299@**
300@*******************************************************************************
301@*
302@*ih264_intra_pred_luma_8x8_mode_dc
303@*
304@* @brief
305@*  Perform Intra prediction for  luma_8x8 mode:DC
306@*
307@* @par Description:
308@*  Perform Intra prediction for  luma_8x8 mode:DC ,described in sec 8.3.2.2.3
309@*
310@* @param[in] pu1_src
311@*  UWORD8 pointer to the source
312@*
313@* @param[out] pu1_dst
314@*  UWORD8 pointer to the destination
315@*
316@* @param[in] src_strd
317@*  integer source stride
318@*
319@* @param[in] dst_strd
320@*  integer destination stride
321@*
322@* @param[in] ui_neighboravailability
323@*  availability of neighbouring pixels
324@*
325@* @returns
326@*
327@* @remarks
328@*  None
329@*
330@*******************************************************************************
331@void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
332@                                       UWORD8 *pu1_dst,
333@                                       WORD32 src_strd,
334@                                       WORD32 dst_strd,
335@                                       WORD32 ui_neighboravailability)
336
337@**************Variables Vs Registers*****************************************
338@   r0 => *pu1_src
339@   r1 => *pu1_dst
340@   r2 =>  src_strd
341@   r3 =>  dst_strd
342@   r4 =>  ui_neighboravailability
343
344
345    .global ih264_intra_pred_luma_8x8_mode_dc_a9q
346
347ih264_intra_pred_luma_8x8_mode_dc_a9q:
348
349    stmfd         sp!, {r4, r14}        @store register values to stack
350    ldr           r4, [sp, #8]          @r4 =>  ui_neighboravailability
351
352    ands          r2, r4, #0x01         @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE
353    beq           top_available
354    ands          r2, r4, #0x04         @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE
355    beq           left_available
356
357    vld1.u8       {d0}, [r0]            @BOTH LEFT AND TOP AVAILABLE
358    add           r0, r0, #9
359    vld1.u8       {d1}, [r0]
360    vpaddl.u8     q0, q0
361    vadd.u16      d0, d0, d1
362    vpaddl.u16    d0, d0
363    vpaddl.u32    d0, d0
364    vqrshrun.s16  d0, q0, #4
365    vdup.u8       d0, d0[0]
366    b             str_pred
367
368top_available:                          @ONLY TOP AVAILABLE
369    ands          r2, r4, #0x04         @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE
370    beq           none_available
371
372    add           r0, r0, #9
373    vld1.u8       {d0}, [r0]
374    vpaddl.u8     d0, d0
375    vpaddl.u16    d0, d0
376    vpaddl.u32    d0, d0
377    vqrshrun.s16  d0, q0, #3
378    vdup.u8       d0, d0[0]
379    b             str_pred
380
381left_available:                         @ONLY LEFT AVAILABLE
382    vld1.u8       {d0}, [r0]
383    vpaddl.u8     d0, d0
384    vpaddl.u16    d0, d0
385    vpaddl.u32    d0, d0
386    vqrshrun.s16  d0, q0, #3
387    vdup.u8       d0, d0[0]
388    b             str_pred
389
390none_available:                         @NONE AVAILABLE
391    vmov.u8       q0, #128
392
393str_pred:
394    vst1.8        {d0}, [r1], r3
395    vst1.8        {d0}, [r1], r3
396    vst1.8        {d0}, [r1], r3
397    vst1.8        {d0}, [r1], r3
398    vst1.8        {d0}, [r1], r3
399    vst1.8        {d0}, [r1], r3
400    vst1.8        {d0}, [r1], r3
401    vst1.8        {d0}, [r1], r3
402
403    ldmfd         sp!, {r4, pc}         @Restoring registers from stack
404
405
406
407
408
409
410@**
411@*******************************************************************************
412@*
413@*ih264_intra_pred_luma_8x8_mode_diag_dl
414@*
415@* @brief
416@*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Left
417@*
418@* @par Description:
419@*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4
420@*
421@* @param[in] pu1_src
422@*  UWORD8 pointer to the source
423@*
424@* @param[out] pu1_dst
425@*  UWORD8 pointer to the destination
426@*
427@* @param[in] src_strd
428@*  integer source stride
429@*
430@* @param[in] dst_strd
431@*  integer destination stride
432@*
433@* @param[in] ui_neighboravailability
434@*  availability of neighbouring pixels
435@*
436@* @returns
437@*
438@* @remarks
439@*  None
440@*
441@*******************************************************************************
442@void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
443@                                            UWORD8 *pu1_dst,
444@                                            WORD32 src_strd,
445@                                            WORD32 dst_strd,
446@                                            WORD32 ui_neighboravailability)
447
448@**************Variables Vs Registers*****************************************
449@   r0 => *pu1_src
450@   r1 => *pu1_dst
451@   r2 =>  src_strd
452@   r3 =>  dst_strd
453@   r4 =>  ui_neighboravailability
454
455    .global ih264_intra_pred_luma_8x8_mode_diag_dl_a9q
456
457ih264_intra_pred_luma_8x8_mode_diag_dl_a9q:
458
459    stmfd         sp!, {r4-r12, r14}    @store register values to stack
460
461    add           r0, r0, #9
462    sub           r5, r3, #4
463    add           r6, r0, #15
464    vld1.8        {q0}, [r0]
465    vext.8        q2, q0, q0, #2
466    vext.8        q1, q0, q0, #1
467    vld1.8        {d5[6]}, [r6]
468    @ q1 = q0 shifted to left once
469    @ q2 = q1 shifted to left once
470    vaddl.u8      q10, d0, d2           @Adding for FILT121
471    vaddl.u8      q11, d1, d3
472    vaddl.u8      q12, d2, d4
473    vaddl.u8      q13, d3, d5
474    vadd.u16      q12, q10, q12
475    vadd.u16      q13, q11, q13
476
477    vqrshrun.s16  d4, q12, #2
478    vqrshrun.s16  d5, q13, #2
479    @Q2 has all FILT121 values
480    vst1.8        {d4}, [r1], r3
481    vext.8        q9, q2, q2, #1
482    vext.8        q8, q9, q9, #1
483    vst1.8        {d18}, [r1], r3
484    vext.8        q15, q8, q8, #1
485    vst1.8        {d16}, [r1], r3
486    vst1.8        {d30}, [r1], r3
487    vst1.32       {d4[1]}, [r1]!
488    vst1.32       {d5[0]}, [r1], r5
489    vst1.32       {d18[1]}, [r1]!
490    vst1.32       {d19[0]}, [r1], r5
491    vst1.32       {d16[1]}, [r1]!
492    vst1.32       {d17[0]}, [r1], r5
493    vst1.32       {d30[1]}, [r1]!
494    vst1.32       {d31[0]}, [r1], r5
495
496
497end_func_diag_dl:
498    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
499
500
501
502
503@**
504@*******************************************************************************
505@*
506@*ih264_intra_pred_luma_8x8_mode_diag_dr
507@*
508@* @brief
509@* Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Right
510@*
511@* @par Description:
512@*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5
513@*
514@* @param[in] pu1_src
515@*  UWORD8 pointer to the source
516@*
517@* @param[out] pu1_dst
518@*  UWORD8 pointer to the destination
519@*
520@* @param[in] src_strd
521@*  integer source stride
522@*
523@* @param[in] dst_strd
524@*  integer destination stride
525@*
526@* @param[in] ui_neighboravailability
527@*  availability of neighbouring pixels
528@*
529@* @returns
530@*
531@* @remarks
532@*  None
533@*
534@*******************************************************************************
535@void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
536@                                            UWORD8 *pu1_dst,
537@                                            WORD32 src_strd,
538@                                            WORD32 dst_strd,
539@                                            WORD32 ui_neighboravailability)
540
541@**************Variables Vs Registers*****************************************
542@   r0 => *pu1_src
543@   r1 => *pu1_dst
544@   r2 =>  src_strd
545@   r3 =>  dst_strd
546@   r4 =>  ui_neighboravailability
547
548
549    .global ih264_intra_pred_luma_8x8_mode_diag_dr_a9q
550
551ih264_intra_pred_luma_8x8_mode_diag_dr_a9q:
552
553    stmfd         sp!, {r4-r12, r14}    @store register values to stack
554
555
556    vld1.u8       {q0}, [r0]
557    add           r0, r0, #1
558    vld1.u8       {q1}, [r0]
559    vext.8        q2, q1, q1, #1
560    @ q1 = q0 shifted to left once
561    @ q2 = q1 shifted to left once
562    vaddl.u8      q10, d0, d2           @Adding for FILT121
563    vaddl.u8      q11, d1, d3
564    vaddl.u8      q12, d2, d4
565    vaddl.u8      q13, d3, d5
566    vadd.u16      q12, q10, q12
567    vadd.u16      q13, q11, q13
568    vqrshrun.s16  d4, q12, #2
569    vqrshrun.s16  d5, q13, #2
570    @Q2 has all FILT121 values
571    sub           r5, r3, #4
572    vext.8        q9, q2, q2, #15
573    vst1.8        {d19}, [r1], r3
574    vext.8        q8, q9, q9, #15
575    vst1.8        {d17}, [r1], r3
576    vext.8        q15, q8, q8, #15
577    vst1.8        {d31}, [r1], r3
578    vst1.32       {d4[1]}, [r1]!
579    vst1.32       {d5[0]}, [r1], r5
580    vst1.32       {d18[1]}, [r1]!
581    vst1.32       {d19[0]}, [r1], r5
582    vst1.32       {d16[1]}, [r1]!
583    vst1.32       {d17[0]}, [r1], r5
584    vst1.32       {d30[1]}, [r1]!
585    vst1.32       {d31[0]}, [r1], r5
586    vst1.8        {d4}, [r1], r3
587
588end_func_diag_dr:
589    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
590
591
592
593
594@**
595@*******************************************************************************
596@*
597@*ih264_intra_pred_luma_8x8_mode_vert_r
598@*
599@* @brief
600@* Perform Intra prediction for  luma_8x8 mode:Vertical_Right
601@*
602@* @par Description:
603@*   Perform Intra prediction for  luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6
604@*
605@* @param[in] pu1_src
606@*  UWORD8 pointer to the source
607@*
608@* @param[out] pu1_dst
609@*  UWORD8 pointer to the destination
610@*
611@* @param[in] src_strd
612@*  integer source stride
613@*
614@* @param[in] dst_strd
615@*  integer destination stride
616@*
617@* @param[in] ui_neighboravailability
618@*  availability of neighbouring pixels
619@*
620@* @returns
621@*
622@* @remarks
623@*  None
624@*
625@*******************************************************************************
626@void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
627@                                            UWORD8 *pu1_dst,
628@                                            WORD32 src_strd,
629@                                            WORD32 dst_strd,
630@                                            WORD32 ui_neighboravailability)
631
632@**************Variables Vs Registers*****************************************
633@   r0 => *pu1_src
634@   r1 => *pu1_dst
635@   r2 =>  src_strd
636@   r3 =>  dst_strd
637@   r4 =>  ui_neighboravailability
638
639
640    .global ih264_intra_pred_luma_8x8_mode_vert_r_a9q
641
642ih264_intra_pred_luma_8x8_mode_vert_r_a9q:
643
644    stmfd         sp!, {r4-r12, r14}    @store register values to stack
645
646    vld1.u8       {q0}, [r0]
647    add           r0, r0, #1
648    vld1.u8       {q1}, [r0]
649    vext.8        q2, q1, q1, #1
650    @ q1 = q0 shifted to left once
651    @ q2 = q1 shifted to left once
652    vaddl.u8      q10, d0, d2
653    vaddl.u8      q11, d1, d3
654    vaddl.u8      q12, d2, d4
655    vaddl.u8      q13, d3, d5
656    vadd.u16      q12, q10, q12
657    vadd.u16      q13, q11, q13
658
659    vqrshrun.s16  d4, q10, #1
660    vqrshrun.s16  d5, q11, #1
661    vqrshrun.s16  d6, q12, #2
662    vqrshrun.s16  d7, q13, #2
663    @Q2 has all FILT11 values
664    @Q3 has all FILT121 values
665    sub           r5, r3, #6
666    sub           r6, r3, #4
667    vst1.8        {d5}, [r1], r3        @ row 0
668    vext.8        q9, q3, q3, #15
669    vmov.8        q11, q9
670    vext.8        q8, q2, q2, #1
671    vst1.8        {d19}, [r1], r3       @row 1
672
673    vmov.8        q15, q8
674    vext.8        q10, q2, q2, #15
675    vuzp.8        q8, q9
676    @row 2
677    vext.8        q14, q8, q8, #1
678    vst1.8        {d21}, [r1]
679    vst1.8        {d6[6]}, [r1], r3
680    @row 3
681
682    vst1.16       {d29[1]}, [r1]!
683    vst1.32       {d7[0]}, [r1]!
684    vst1.16       {d7[2]}, [r1], r5
685@row 4
686    vst1.16       {d19[1]}, [r1]!
687    vst1.32       {d5[0]}, [r1]!
688    vst1.16       {d5[2]}, [r1], r5
689
690@row 5
691    vext.8        q13, q9, q9, #1
692    vst1.16       {d17[1]}, [r1]!
693    vst1.32       {d23[0]}, [r1]!
694    vst1.16       {d23[2]}, [r1], r5
695
696
697@row 6
698    vst1.16       {d27[0]}, [r1]!
699    vst1.8        {d27[2]}, [r1]!
700    vst1.8        {d5[0]}, [r1]!
701    vst1.32       {d31[0]}, [r1], r6
702@row 7
703    vst1.32       {d29[0]}, [r1]!
704    vst1.32       {d7[0]}, [r1]!
705
706
707
708end_func_vert_r:
709    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
710
711
712
713
714@**
715@*******************************************************************************
716@*
717@*ih264_intra_pred_luma_8x8_mode_horz_d
718@*
719@* @brief
720@* Perform Intra prediction for  luma_8x8 mode:Horizontal_Down
721@*
722@* @par Description:
723@*   Perform Intra prediction for  luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7
724@*
725@* @param[in] pu1_src
726@*  UWORD8 pointer to the source
727@*
728@* @param[out] pu1_dst
729@*  UWORD8 pointer to the destination
730@*
731@* @param[in] src_strd
732@*  integer source stride
733@*
734@* @param[in] dst_strd
735@*  integer destination stride
736@*
737@* @param[in] ui_neighboravailability
738@*  availability of neighbouring pixels
739@*
740@* @returns
741@*
742@* @remarks
743@*  None
744@*
745@*******************************************************************************
746@void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
747@                                            UWORD8 *pu1_dst,
748@                                            WORD32 src_strd,
749@                                            WORD32 dst_strd,
750@                                            WORD32 ui_neighboravailability)
751
752@**************Variables Vs Registers*****************************************
753@   r0 => *pu1_src
754@   r1 => *pu1_dst
755@   r2 =>  src_strd
756@   r3 =>  dst_strd
757@   r4 =>  ui_neighboravailability
758
759    .global ih264_intra_pred_luma_8x8_mode_horz_d_a9q
760
761ih264_intra_pred_luma_8x8_mode_horz_d_a9q:
762
763    stmfd         sp!, {r4-r12, r14}    @store register values to stack
764    vpush         {d8-d15}
765
766    vld1.u8       {q0}, [r0]
767    add           r0, r0, #1
768    vld1.u8       {q1}, [r0]
769    vext.8        q2, q1, q1, #1
770    @ q1 = q0 shifted to left once
771    @ q2 = q1 shifted to left once
772    vaddl.u8      q10, d0, d2
773    vaddl.u8      q11, d1, d3
774    vaddl.u8      q12, d2, d4
775    vaddl.u8      q13, d3, d5
776    vadd.u16      q12, q10, q12
777    vadd.u16      q13, q11, q13
778
779    vqrshrun.s16  d4, q10, #1
780    vqrshrun.s16  d5, q11, #1
781    vqrshrun.s16  d6, q12, #2
782    vqrshrun.s16  d7, q13, #2
783    @Q2 has all FILT11 values
784    @Q3 has all FILT121 values
785    vmov.8        q4, q2
786    vmov.8        q5, q3
787    sub           r6, r3, #6
788    vtrn.8        q4, q5                @
789    vmov.8        q6, q4
790    vmov.8        q7, q5
791    sub           r5, r3, #4
792    vtrn.16       q6, q7
793    vext.8        q8, q3, q3, #14
794    @ROW 0
795    vst1.8        {d17}, [r1]
796    vst1.16       {d10[3]}, [r1], r3
797
798    @ROW 1
799    vst1.32       {d14[1]}, [r1]!
800    vst1.32       {d7[0]}, [r1], r5
801    @ROW 2
802    vst1.16       {d10[2]}, [r1]!
803    vst1.32       {d14[1]}, [r1]!
804    vst1.16       {d7[0]}, [r1], r6
805    @ROW 3
806    vst1.32       {d12[1]}, [r1]!
807    vst1.32       {d14[1]}, [r1], r5
808    @ROW 4
809    vst1.16       {d14[1]}, [r1]!
810    vst1.32       {d12[1]}, [r1]!
811    vst1.16       {d14[2]}, [r1], r6
812    @ROW 5
813    vst1.32       {d14[0]}, [r1]!
814    vst1.32       {d12[1]}, [r1], r5
815    @ROW 6
816    vst1.16       {d10[0]}, [r1]!
817    vst1.16       {d8[1]}, [r1]!
818    vst1.16       {d14[1]}, [r1]!
819    vst1.16       {d12[2]}, [r1], r6
820    @ROW 7
821    vst1.32       {d12[0]}, [r1]!
822    vst1.32       {d14[0]}, [r1], r5
823
824end_func_horz_d:
825    vpop          {d8-d15}
826    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
827
828
829
830
831
832@**
833@*******************************************************************************
834@*
835@*ih264_intra_pred_luma_8x8_mode_vert_l
836@*
837@* @brief
838@*  Perform Intra prediction for  luma_8x8 mode:Vertical_Left
839@*
840@* @par Description:
841@*   Perform Intra prediction for  luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8
842@*
843@* @param[in] pu1_src
844@*  UWORD8 pointer to the source
845@*
846@* @param[out] pu1_dst
847@*  UWORD8 pointer to the destination
848@*
849@* @param[in] src_strd
850@*  integer source stride
851@*
852@* @param[in] dst_strd
853@*  integer destination stride
854@*
855@* @param[in] ui_neighboravailability
856@*  availability of neighbouring pixels
857@*
858@* @returns
859@*
860@* @remarks
861@*  None
862@*
863@*******************************************************************************
864@void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
865@                                            UWORD8 *pu1_dst,
866@                                            WORD32 src_strd,
867@                                            WORD32 dst_strd,
868@                                            WORD32 ui_neighboravailability)
869
870@**************Variables Vs Registers*****************************************
871@   r0 => *pu1_src
872@   r1 => *pu1_dst
873@   r2 =>  src_strd
874@   r3 =>  dst_strd
875@   r4 =>  ui_neighboravailability
876
877
878    .global ih264_intra_pred_luma_8x8_mode_vert_l_a9q
879
880ih264_intra_pred_luma_8x8_mode_vert_l_a9q:
881
882    stmfd         sp!, {r4-r12, r14}    @Restoring registers from stack
883    vpush         {d8-d15}
884
885    add           r0, r0, #9
886    vld1.u8       {q0}, [r0]
887    add           r0, r0, #1
888    vld1.u8       {q1}, [r0]
889    vext.8        q2, q1, q1, #1
890    vaddl.u8      q10, d0, d2
891    vaddl.u8      q11, d1, d3
892    vaddl.u8      q12, d2, d4
893    vaddl.u8      q13, d3, d5
894    vadd.u16      q12, q10, q12
895    vadd.u16      q13, q11, q13
896
897    vqrshrun.s16  d4, q10, #1
898    vqrshrun.s16  d5, q11, #1
899    vqrshrun.s16  d6, q12, #2
900    vext.8        q4, q2, q2, #1
901    vqrshrun.s16  d7, q13, #2
902    @Q2 has all FILT11 values
903    @Q3 has all FILT121 values
904
905    vext.8        q5, q3, q3, #1
906    @ROW 0,1
907    vst1.8        {d4}, [r1], r3
908    vst1.8        {d6}, [r1], r3
909
910    vext.8        q6, q4, q4, #1
911    vext.8        q7, q5, q5, #1
912    @ROW 2,3
913    vst1.8        {d8}, [r1], r3
914    vst1.8        {d10}, [r1], r3
915
916    vext.8        q8, q6, q6, #1
917    vext.8        q9, q7, q7, #1
918    @ROW 4,5
919    vst1.8        {d12}, [r1], r3
920    vst1.8        {d14}, [r1], r3
921    @ROW 6,7
922    vst1.8        {d16}, [r1], r3
923    vst1.8        {d18}, [r1], r3
924
925end_func_vert_l:
926    vpop          {d8-d15}
927    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
928
929
930
931
932
933@**
934@*******************************************************************************
935@*
936@*ih264_intra_pred_luma_8x8_mode_horz_u
937@*
938@* @brief
939@*     Perform Intra prediction for  luma_8x8 mode:Horizontal_Up
940@*
941@* @par Description:
942@*      Perform Intra prediction for  luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9
943@*
944@* @param[in] pu1_src
945@*  UWORD8 pointer to the source
946@*
947@* @param[out] pu1_dst
948@*  UWORD8 pointer to the destination
949@*
950@* @param[in] src_strd
951@*  integer source stride
952@*
953@* @param[in] dst_strd
954@*  integer destination stride
955@*
956@* @param[in] ui_neighboravailability
957@*  availability of neighbouring pixels
958@*
959@* @returns
960@*
961@* @remarks
962@*  None
963@*
964@*******************************************************************************
965@void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
966@                                           UWORD8 *pu1_dst,
967@                                           WORD32 src_strd,
968@                                           WORD32 dst_strd,
969@                                           WORD32 ui_neighboravailability)
970
971@**************Variables Vs Registers*****************************************
972@   r0 => *pu1_src
973@   r1 => *pu1_dst
974@   r2 =>  src_strd
975@   r3 =>  dst_strd
976@   r4 =>  ui_neighboravailability
977
978    .global ih264_intra_pred_luma_8x8_mode_horz_u_a9q
979
980ih264_intra_pred_luma_8x8_mode_horz_u_a9q:
981
982    stmfd         sp!, {r4-r12, r14}    @store register values to stack
983    vpush         {d8-d15}
984
985    vld1.u8       {q0}, [r0]
986    vld1.u8       {d1[7]}, [r0]
987    vext.8        q1, q0, q0, #1
988    vext.8        q2, q1, q1, #1
989    @ LOADING V TABLE
990    ldr           r12, scratch_intrapred_addr_8x8
991scrlb8x8l2:
992    add           r12, r12, pc
993    vaddl.u8      q10, d0, d2
994    vaddl.u8      q11, d1, d3
995    vaddl.u8      q12, d2, d4
996    vaddl.u8      q13, d3, d5
997    vadd.u16      q12, q10, q12
998    vadd.u16      q13, q11, q13
999    vld1.u8       {q5}, [r12]
1000    vqrshrun.s16  d4, q10, #1
1001    vqrshrun.s16  d5, q11, #1
1002    vqrshrun.s16  d6, q12, #2
1003    vqrshrun.s16  d7, q13, #2
1004    @Q2 has all FILT11 values
1005    @Q3 has all FILT121 values
1006    vtbl.u8       d12, {q2, q3}, d10
1007    vdup.u8       q7, d5[7]             @
1008    vtbl.u8       d13, {q2, q3}, d11
1009    vext.8        q8, q6, q7, #2
1010    vext.8        q9, q8, q7, #2
1011    vst1.8        {d12}, [r1], r3
1012    vext.8        q10, q9, q7, #2
1013    vst1.8        {d16}, [r1], r3
1014    vst1.8        {d18}, [r1], r3
1015    vst1.8        {d20}, [r1], r3
1016    vst1.8        {d13}, [r1], r3
1017    vst1.8        {d17}, [r1], r3
1018    vst1.8        {d19}, [r1], r3
1019    vst1.8        {d21}, [r1], r3
1020
1021
1022end_func_horz_u:
1023    vpop          {d8-d15}
1024    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
1025
1026
1027
1028
1029
1030
1031
1032
1033