1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20@**
21@******************************************************************************
22@* @file
23@*  ih264_intra_pred_luma_4x4_a9q.s
24@*
25@* @brief
26@*  Contains function definitions for intra 4x4 Luma prediction .
27@*
28@* @author
29@*  Ittiam
30@*
31@* @par List of Functions:
32@*
33@*  -ih264_intra_pred_luma_4x4_mode_vert_a9q
34@*  -ih264_intra_pred_luma_4x4_mode_horz_a9q
35@*  -ih264_intra_pred_luma_4x4_mode_dc_a9q
36@*  -ih264_intra_pred_luma_4x4_mode_diag_dl_a9q
37@*  -ih264_intra_pred_luma_4x4_mode_diag_dr_a9q
38@*  -ih264_intra_pred_luma_4x4_mode_vert_r_a9q
39@*  -ih264_intra_pred_luma_4x4_mode_horz_d_a9q
40@*  -ih264_intra_pred_luma_4x4_mode_vert_l_a9q
41@*  -ih264_intra_pred_luma_4x4_mode_horz_u_a9q
42@*
43@* @remarks
44@*  None
45@*
46@*******************************************************************************
47@*
48
49@* All the functions here are replicated from ih264_intra_pred_filters.c
50@
51
52.text
53.p2align 2
54
55
56@**
57@*******************************************************************************
58@*
59@*ih264_intra_pred_luma_4x4_mode_vert
60@*
61@* @brief
62@*  Perform Intra prediction for  luma_4x4 mode:vertical
63@*
64@* @par Description:
65@* Perform Intra prediction for  luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
66@*
67@* @param[in] pu1_src
68@*  UWORD8 pointer to the source
69@*
70@* @param[out] pu1_dst
71@*  UWORD8 pointer to the destination
72@*
73@* @param[in] src_strd
74@*  integer source stride
75@*
76@* @param[in] dst_strd
77@*  integer destination stride
78@*
79@* @param[in] ui_neighboravailability
80@* availability of neighbouring pixels(Not used in this function)
81@*
82@* @returns
83@*
84@* @remarks
85@*  None
86@*
87@*******************************************************************************
88@void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src,
89@                                        UWORD8 *pu1_dst,
90@                                        WORD32 src_strd,
91@                                        WORD32 dst_strd,
92@                                        WORD32 ui_neighboravailability)
93
94@**************Variables Vs Registers*****************************************
95@   r0 => *pu1_src
96@   r1 => *pu1_dst
97@   r2 =>  src_strd
98@   r3 =>  dst_strd
99@   r4 =>  ui_neighboravailability
100
101    .global ih264_intra_pred_luma_4x4_mode_vert_a9q
102
103ih264_intra_pred_luma_4x4_mode_vert_a9q:
104
105
106
107    stmfd         sp!, {r4-r12, r14}    @store register values to stack
108
109    add           r0, r0, #5
110
111    vld1.32       d0[0], [r0]
112
113    vst1.32       d0[0], [r1], r3
114    vst1.32       d0[0], [r1], r3
115    vst1.32       d0[0], [r1], r3
116    vst1.32       d0[0], [r1], r3
117
118
119
120    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
121
122
123
124
125
126@******************************************************************************
127
128
129@**
130@*******************************************************************************
131@*
132@*ih264_intra_pred_luma_4x4_mode_horz
133@*
134@* @brief
135@*  Perform Intra prediction for  luma_4x4 mode:horizontal
136@*
137@* @par Description:
138@*  Perform Intra prediction for  luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
139@*
140@* @param[in] pu1_src
141@*  UWORD8 pointer to the source
142@*
143@* @param[out] pu1_dst
144@*  UWORD8 pointer to the destination
145@*
146@* @param[in] src_strd
147@*  integer source stride
148@*
149@* @param[in] dst_strd
150@*  integer destination stride
151@*
152@* @param[in] ui_neighboravailability
153@* availability of neighbouring pixels(Not used in this function)
154@*
155@* @returns
156@*
157@* @remarks
158@*  None
159@*
160@*******************************************************************************
161@*
162@void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
163@                                         UWORD8 *pu1_dst,
164@                                         WORD32 src_strd,
165@                                         WORD32 dst_strd,
166@                                         WORD32 ui_neighboravailability)
167@**************Variables Vs Registers*****************************************
168@   r0 => *pu1_src
169@   r1 => *pu1_dst
170@   r2 =>  src_strd
171@   r3 =>  dst_strd
172@   r4 =>  ui_neighboravailability
173
174
175
176    .global ih264_intra_pred_luma_4x4_mode_horz_a9q
177
178ih264_intra_pred_luma_4x4_mode_horz_a9q:
179
180
181
182    stmfd         sp!, {r4-r12, r14}    @store register values to stack
183    add           r0, r0, #3
184    mov           r2 , #-1
185
186    ldrb          r5, [r0], r2
187    vdup.u8       d0, r5
188    ldrb          r6, [r0], r2
189    vst1.32       d0[0], [r1], r3
190    vdup.u8       d1, r6
191    ldrb          r7, [r0], r2
192    vst1.32       d1[0], [r1], r3
193    vdup.u8       d2, r7
194    ldrb          r8, [r0], r2
195    vst1.32       d2[0], [r1], r3
196    vdup.u8       d3, r8
197    vst1.32       d3[0], [r1], r3
198
199
200    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
201
202
203
204
205
206
207
208@******************************************************************************
209
210
211@**
212@*******************************************************************************
213@*
214@*ih264_intra_pred_luma_4x4_mode_dc
215@*
216@* @brief
217@*  Perform Intra prediction for  luma_4x4 mode:DC
218@*
219@* @par Description:
220@*  Perform Intra prediction for  luma_4x4 mode:DC ,described in sec 8.3.1.2.3
221@*
222@* @param[in] pu1_src
223@*  UWORD8 pointer to the source
224@*
225@* @param[out] pu1_dst
226@*  UWORD8 pointer to the destination
227@*
228@* @param[in] src_strd
229@*  integer source stride
230@*
231@* @param[in] dst_strd
232@*  integer destination stride
233@*
234@* @param[in] ui_neighboravailability
235@*  availability of neighbouring pixels
236@*
237@* @returns
238@*
239@* @remarks
240@*  None
241@*
242@*******************************************************************************
243@void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
244@                                       UWORD8 *pu1_dst,
245@                                       WORD32 src_strd,
246@                                       WORD32 dst_strd,
247@                                       WORD32 ui_neighboravailability)
248
249@**************Variables Vs Registers*****************************************
250@   r0 => *pu1_src
251@   r1 => *pu1_dst
252@   r2 =>  src_strd
253@   r3 =>  dst_strd
254@   r4 =>  ui_neighboravailability
255
256
257
258    .global ih264_intra_pred_luma_4x4_mode_dc_a9q
259
260ih264_intra_pred_luma_4x4_mode_dc_a9q:
261
262
263
264    stmfd         sp!, {r4-r12, r14}    @store register values to stack
265    ldr           r4, [sp, #40]         @   r4 =>  ui_neighboravailability
266
267    ands          r5, r4, #0x01
268    beq           top_available         @LEFT NOT AVAILABLE
269
270    add           r10, r0, #3
271    mov           r2, #-1
272    ldrb          r5, [r10], r2
273    ldrb          r6, [r10], r2
274    ldrb          r7, [r10], r2
275    add           r5, r5, r6
276    ldrb          r8, [r10], r2
277    add           r5, r5, r7
278    ands          r11, r4, #0x04        @ CHECKING IF TOP_AVAILABLE  ELSE BRANCHING TO ONLY LEFT AVAILABLE
279    add           r5, r5, r8
280    beq           left_available
281    add           r10, r0, #5
282    @    BOTH LEFT AND TOP AVAILABLE
283    ldrb          r6, [r10], #1
284    ldrb          r7, [r10], #1
285    add           r5, r5, r6
286    ldrb          r8, [r10], #1
287    add           r5, r5, r7
288    ldrb          r9, [r10], #1
289    add           r5, r5, r8
290    add           r5, r5, r9
291    add           r5, r5, #4
292    lsr           r5, r5, #3
293    vdup.u8       d0, r5
294    vst1.32       d0[0], [r1], r3
295    vst1.32       d0[0], [r1], r3
296    vst1.32       d0[0], [r1], r3
297    vst1.32       d0[0], [r1], r3
298    b             end_func
299
300top_available: @ ONLT TOP AVAILABLE
301    ands          r11, r4, #0x04        @ CHECKING TOP AVAILABILTY  OR ELSE BRANCH TO NONE AVAILABLE
302    beq           none_available
303
304    add           r10, r0, #5
305    ldrb          r6, [r10], #1
306    ldrb          r7, [r10], #1
307    ldrb          r8, [r10], #1
308    add           r5, r6, r7
309    ldrb          r9, [r10], #1
310    add           r5, r5, r8
311    add           r5, r5, r9
312    add           r5, r5, #2
313    lsr           r5, r5, #2
314    vdup.u8       d0, r5
315    vst1.32       d0[0], [r1], r3
316    vst1.32       d0[0], [r1], r3
317    vst1.32       d0[0], [r1], r3
318    vst1.32       d0[0], [r1], r3
319    b             end_func
320
321left_available: @ONLY LEFT AVAILABLE
322    add           r5, r5, #2
323    lsr           r5, r5, #2
324    vdup.u8       d0, r5
325    vst1.32       d0[0], [r1], r3
326    vst1.32       d0[0], [r1], r3
327    vst1.32       d0[0], [r1], r3
328    vst1.32       d0[0], [r1], r3
329    b             end_func
330
331none_available:                         @NONE AVAILABLE
332    mov           r5, #128
333    vdup.u8       d0, r5
334    vst1.32       d0[0], [r1], r3
335    vst1.32       d0[0], [r1], r3
336    vst1.32       d0[0], [r1], r3
337    vst1.32       d0[0], [r1], r3
338    b             end_func
339
340
341end_func:
342    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
343
344
345
346
347
348
349
350@**
351@*******************************************************************************
352@*
353@*ih264_intra_pred_luma_4x4_mode_diag_dl
354@*
355@* @brief
356@*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Left
357@*
358@* @par Description:
359@*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
360@*
361@* @param[in] pu1_src
362@*  UWORD8 pointer to the source
363@*
364@* @param[out] pu1_dst
365@*  UWORD8 pointer to the destination
366@*
367@* @param[in] src_strd
368@*  integer source stride
369@*
370@* @param[in] dst_strd
371@*  integer destination stride
372@*
373@* @param[in] ui_neighboravailability
374@*  availability of neighbouring pixels
375@*
376@* @returns
377@*
378@* @remarks
379@*  None
380@*
381@*******************************************************************************
382@void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
383@                                            UWORD8 *pu1_dst,
384@                                            WORD32 src_strd,
385@                                            WORD32 dst_strd,
386@                                            WORD32 ui_neighboravailability)
387
388@**************Variables Vs Registers*****************************************
389@   r0 => *pu1_src
390@   r1 => *pu1_dst
391@   r2 =>  src_strd
392@   r3 =>  dst_strd
393@   r4 =>  ui_neighboravailability
394
395
396    .global ih264_intra_pred_luma_4x4_mode_diag_dl_a9q
397
398ih264_intra_pred_luma_4x4_mode_diag_dl_a9q:
399
400    stmfd         sp!, {r4-r12, r14}    @store register values to stack
401
402    add           r0, r0, #5
403    sub           r5, r3, #2
404    add           r6, r0, #7
405    vld1.8        {d0}, [r0]
406    vext.8        d1, d0, d0, #1
407    vext.8        d2, d0, d0, #2
408    vld1.8        {d2[6]}, [r6]
409    vaddl.u8      q10, d0, d1
410    vaddl.u8      q11, d1, d2
411    vadd.u16      q12, q10, q11
412    vqrshrun.s16  d3, q12, #2
413    vst1.32       {d3[0]}, [r1], r3
414    vext.8        d4, d3, d3, #1
415    vst1.32       {d4[0]}, [r1], r3
416    vst1.16       {d3[1]}, [r1]!
417    vst1.16       {d3[2]}, [r1], r5
418    vst1.16       {d4[1]}, [r1]!
419    vst1.16       {d4[2]}, [r1]
420
421end_func_diag_dl:
422    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
423
424
425
426
427
428
429
430
431
432@**
433@*******************************************************************************
434@*
435@*ih264_intra_pred_luma_4x4_mode_diag_dr
436@*
437@* @brief
438@* Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Right
439@*
440@* @par Description:
441@*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
442@*
443@* @param[in] pu1_src
444@*  UWORD8 pointer to the source
445@*
446@* @param[out] pu1_dst
447@*  UWORD8 pointer to the destination
448@*
449@* @param[in] src_strd
450@*  integer source stride
451@*
452@* @param[in] dst_strd
453@*  integer destination stride
454@*
455@* @param[in] ui_neighboravailability
456@*  availability of neighbouring pixels
457@*
458@* @returns
459@*
460@* @remarks
461@*  None
462@*
463@*******************************************************************************
464@void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
465@                                            UWORD8 *pu1_dst,
466@                                            WORD32 src_strd,
467@                                            WORD32 dst_strd,
468@                                            WORD32 ui_neighboravailability)
469
470@**************Variables Vs Registers*****************************************
471@   r0 => *pu1_src
472@   r1 => *pu1_dst
473@   r2 =>  src_strd
474@   r3 =>  dst_strd
475@   r4 =>  ui_neighboravailability
476
477
478    .global ih264_intra_pred_luma_4x4_mode_diag_dr_a9q
479
480ih264_intra_pred_luma_4x4_mode_diag_dr_a9q:
481
482    stmfd         sp!, {r4-r12, r14}    @store register values to stack
483
484
485    vld1.u8       {d0}, [r0]
486    add           r0, r0, #1
487    vld1.u8       {d1}, [r0]
488    vext.8        d2, d1, d1, #1
489    vaddl.u8      q10, d0, d1
490    vaddl.u8      q11, d1, d2
491    vadd.u16      q12, q10, q11
492    vqrshrun.s16  d3, q12, #2
493
494    vext.8        d4, d3, d3, #1
495    sub           r5, r3, #2
496    vst1.16       {d4[1]}, [r1]!
497    vst1.16       {d4[2]}, [r1], r5
498    vst1.16       {d3[1]}, [r1]!
499    vst1.16       {d3[2]}, [r1], r5
500    vst1.32       {d4[0]}, [r1], r3
501    vst1.32       {d3[0]}, [r1], r3
502
503end_func_diag_dr:
504    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
505
506
507
508
509
510
511
512@**
513@*******************************************************************************
514@*
515@*ih264_intra_pred_luma_4x4_mode_vert_r
516@*
517@* @brief
518@* Perform Intra prediction for  luma_4x4 mode:Vertical_Right
519@*
520@* @par Description:
521@*   Perform Intra prediction for  luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
522@*
523@* @param[in] pu1_src
524@*  UWORD8 pointer to the source
525@*
526@* @param[out] pu1_dst
527@*  UWORD8 pointer to the destination
528@*
529@* @param[in] src_strd
530@*  integer source stride
531@*
532@* @param[in] dst_strd
533@*  integer destination stride
534@*
535@* @param[in] ui_neighboravailability
536@*  availability of neighbouring pixels
537@*
538@* @returns
539@*
540@* @remarks
541@*  None
542@*
543@*******************************************************************************
544@void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
545@                                            UWORD8 *pu1_dst,
546@                                            WORD32 src_strd,
547@                                            WORD32 dst_strd,
548@                                            WORD32 ui_neighboravailability)
549
550@**************Variables Vs Registers*****************************************
551@   r0 => *pu1_src
552@   r1 => *pu1_dst
553@   r2 =>  src_strd
554@   r3 =>  dst_strd
555@   r4 =>  ui_neighboravailability
556
557
558    .global ih264_intra_pred_luma_4x4_mode_vert_r_a9q
559
560ih264_intra_pred_luma_4x4_mode_vert_r_a9q:
561
562    stmfd         sp!, {r4-r12, r14}    @store register values to stack
563
564
565    vld1.u8       {d0}, [r0]
566    add           r0, r0, #1
567    vld1.u8       {d1}, [r0]
568    vext.8        d2, d1, d1, #1
569    vaddl.u8      q10, d0, d1
570    vaddl.u8      q11, d1, d2
571    vadd.u16      q12, q10, q11
572    vqrshrun.s16  d4, q10, #1
573    vqrshrun.s16  d3, q12, #2
574    sub           r5, r3, #2
575    vext.8        d5, d3, d3, #3
576    vst1.32       {d4[1]}, [r1], r3
577    vst1.32       {d5[0]}, [r1], r3
578    sub           r8, r3, #3
579    vst1.u8       {d3[2]}, [r1]!
580    vst1.16       {d4[2]}, [r1]!
581    vst1.u8       {d4[6]}, [r1], r8
582    vst1.u8       {d3[1]}, [r1]!
583    vst1.16       {d5[0]}, [r1]!
584    vst1.u8       {d5[2]}, [r1]
585
586
587end_func_vert_r:
588    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
589
590
591
592
593
594@**
595@*******************************************************************************
596@*
597@*ih264_intra_pred_luma_4x4_mode_horz_d
598@*
599@* @brief
600@* Perform Intra prediction for  luma_4x4 mode:Horizontal_Down
601@*
602@* @par Description:
603@*   Perform Intra prediction for  luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
604@*
605@* @param[in] pu1_src
606@*  UWORD8 pointer to the source
607@*
608@* @param[out] pu1_dst
609@*  UWORD8 pointer to the destination
610@*
611@* @param[in] src_strd
612@*  integer source stride
613@*
614@* @param[in] dst_strd
615@*  integer destination stride
616@*
617@* @param[in] ui_neighboravailability
618@*  availability of neighbouring pixels
619@*
620@* @returns
621@*
622@* @remarks
623@*  None
624@*
625@*******************************************************************************
626@void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
627@                                            UWORD8 *pu1_dst,
628@                                            WORD32 src_strd,
629@                                            WORD32 dst_strd,
630@                                            WORD32 ui_neighboravailability)
631
632@**************Variables Vs Registers*****************************************
633@   r0 => *pu1_src
634@   r1 => *pu1_dst
635@   r2 =>  src_strd
636@   r3 =>  dst_strd
637@   r4 =>  ui_neighboravailability
638
639
640    .global ih264_intra_pred_luma_4x4_mode_horz_d_a9q
641
642ih264_intra_pred_luma_4x4_mode_horz_d_a9q:
643
644    stmfd         sp!, {r4-r12, r14}    @store register values to stack
645
646    vld1.u8       {d0}, [r0]
647    add           r0, r0, #1
648    vld1.u8       {d1}, [r0]
649    vext.8        d2, d1, d0, #1
650    vaddl.u8      q10, d0, d1
651    vaddl.u8      q11, d1, d2
652    vadd.u16      q12, q10, q11
653    vqrshrun.s16  d4, q10, #1
654    vqrshrun.s16  d5, q12, #2
655    sub           r5, r3, #2
656    vmov.8        d6, d5
657    vtrn.8        d4, d5                @
658    vst1.u16      {d5[1]}, [r1]!
659    vst1.16       {d6[2]}, [r1], r5
660    vst1.u16      {d4[1]}, [r1]!
661    vst1.16       {d5[1]}, [r1], r5
662    vst1.u16      {d5[0]}, [r1]!
663    vst1.16       {d4[1]}, [r1], r5
664    vst1.u16      {d4[0]}, [r1]!
665    vst1.16       {d5[0]}, [r1], r5
666
667end_func_horz_d:
668    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
669
670
671
672
673
674
675
676@**
677@*******************************************************************************
678@*
679@*ih264_intra_pred_luma_4x4_mode_vert_l
680@*
681@* @brief
682@*  Perform Intra prediction for  luma_4x4 mode:Vertical_Left
683@*
684@* @par Description:
685@*   Perform Intra prediction for  luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
686@*
687@* @param[in] pu1_src
688@*  UWORD8 pointer to the source
689@*
690@* @param[out] pu1_dst
691@*  UWORD8 pointer to the destination
692@*
693@* @param[in] src_strd
694@*  integer source stride
695@*
696@* @param[in] dst_strd
697@*  integer destination stride
698@*
699@* @param[in] ui_neighboravailability
700@*  availability of neighbouring pixels
701@*
702@* @returns
703@*
704@* @remarks
705@*  None
706@*
707@*******************************************************************************
708@void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
709@                                            UWORD8 *pu1_dst,
710@                                            WORD32 src_strd,
711@                                            WORD32 dst_strd,
712@                                            WORD32 ui_neighboravailability)
713
714@**************Variables Vs Registers*****************************************
715@   r0 => *pu1_src
716@   r1 => *pu1_dst
717@   r2 =>  src_strd
718@   r3 =>  dst_strd
719@   r4 =>  ui_neighboravailability
720
721
722    .global ih264_intra_pred_luma_4x4_mode_vert_l_a9q
723
724ih264_intra_pred_luma_4x4_mode_vert_l_a9q:
725
726    stmfd         sp!, {r4-r12, r14}    @store register values to stack
727    add           r0, r0, #4
728    vld1.u8       {d0}, [r0]
729    add           r0, r0, #1
730    vld1.u8       {d1}, [r0]
731    vext.8        d2, d1, d0, #1
732    vaddl.u8      q10, d0, d1
733    vaddl.u8      q11, d1, d2
734    vadd.u16      q12, q10, q11
735    vqrshrun.s16  d4, q10, #1
736    vqrshrun.s16  d5, q12, #2
737    vext.8        d6, d4, d4, #1
738    vext.8        d7, d5, d5, #1
739    vst1.32       {d6[0]}, [r1], r3
740    vext.8        d16, d4, d4, #2
741    vext.8        d17, d5, d5, #2
742    vst1.32       {d7[0]}, [r1], r3
743    vst1.32       {d16[0]}, [r1], r3
744    vst1.32       {d17[0]}, [r1], r3
745
746
747
748end_func_vert_l:
749    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
750
751
752
753
754
755
756
757@**
758@*******************************************************************************
759@*
760@*ih264_intra_pred_luma_4x4_mode_horz_u
761@*
762@* @brief
763@*     Perform Intra prediction for  luma_4x4 mode:Horizontal_Up
764@*
765@* @par Description:
766@*      Perform Intra prediction for  luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
767@*
768@* @param[in] pu1_src
769@*  UWORD8 pointer to the source
770@*
771@* @param[out] pu1_dst
772@*  UWORD8 pointer to the destination
773@*
774@* @param[in] src_strd
775@*  integer source stride
776@*
777@* @param[in] dst_strd
778@*  integer destination stride
779@*
780@* @param[in] ui_neighboravailability
781@*  availability of neighbouring pixels
782@*
783@* @returns
784@*
785@* @remarks
786@*  None
787@*
788@*******************************************************************************
789@void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
790@                                           UWORD8 *pu1_dst,
791@                                           WORD32 src_strd,
792@                                           WORD32 dst_strd,
793@                                           WORD32 ui_neighboravailability)
794
795@**************Variables Vs Registers*****************************************
796@   r0 => *pu1_src
797@   r1 => *pu1_dst
798@   r2 =>  src_strd
799@   r3 =>  dst_strd
800@   r4 =>  ui_neighboravailability
801
802
803    .global ih264_intra_pred_luma_4x4_mode_horz_u_a9q
804
805ih264_intra_pred_luma_4x4_mode_horz_u_a9q:
806
807    stmfd         sp!, {r4-r12, r14}    @store register values to stack
808    mov           r10, r0
809    vld1.u8       {d0}, [r0]
810    ldrb          r9, [r0], #1
811    vext.8        d1, d0, d0, #1
812    vld1.u8       {d0[7]}, [r10]
813    vext.8        d2, d1, d1, #1
814    vaddl.u8      q10, d0, d1
815    vaddl.u8      q11, d1, d2
816    vadd.u16      q12, q10, q11
817    vqrshrun.s16  d4, q10, #1
818    vqrshrun.s16  d5, q12, #2
819    vmov          d6, d4
820    vext.8        d6, d5, d4, #1
821    vst1.8        {d4[2]}, [r1]!
822    vst1.8        {d6[0]}, [r1]!
823    vtrn.8        d6, d5                @
824    sub           r5, r3, #2
825    vtrn.8        d4, d6                @
826    vdup.8        d7, r9
827    vst1.16       {d6[0]}, [r1], r5
828    vst1.16       {d6[0]}, [r1]!
829    vst1.16       {d5[3]}, [r1], r5
830    vst1.16       {d5[3]}, [r1]!
831    vst1.16       {d7[3]}, [r1], r5
832    vst1.32       {d7[0]}, [r1], r3
833
834end_func_horz_u:
835    ldmfd         sp!, {r4-r12, pc}     @Restoring registers from stack
836
837
838