1//******************************************************************************
2//*
3//* Copyright (C) 2015 The Android Open Source Project
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************
18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19//*/
20///**
21//******************************************************************************
22//* @file
23//*  ih264_intra_pred_luma_4x4_av8.s
24//*
25//* @brief
26//*  Contains function definitions for intra 4x4 Luma prediction .
27//*
28//* @author
29//*  Ittiam
30//*
31//* @par List of Functions:
32//*
33//*  -ih264_intra_pred_luma_4x4_mode_vert_av8
34//*  -ih264_intra_pred_luma_4x4_mode_horz_av8
35//*  -ih264_intra_pred_luma_4x4_mode_dc_av8
36//*  -ih264_intra_pred_luma_4x4_mode_diag_dl_av8
37//*  -ih264_intra_pred_luma_4x4_mode_diag_dr_av8
38//*  -ih264_intra_pred_luma_4x4_mode_vert_r_av8
39//*  -ih264_intra_pred_luma_4x4_mode_horz_d_av8
40//*  -ih264_intra_pred_luma_4x4_mode_vert_l_av8
41//*  -ih264_intra_pred_luma_4x4_mode_horz_u_av8
42//*
43//* @remarks
44//*  None
45//*
46//*******************************************************************************
47//*/
48
49///* All the functions here are replicated from ih264_intra_pred_filters.c
50//
51
52///**
53///**
54///**
55//
56
57.text
58.p2align 2
59.include "ih264_neon_macros.s"
60
61
62
63
64///**
65//*******************************************************************************
66//*
67//*ih264_intra_pred_luma_4x4_mode_vert
68//*
69//* @brief
70//*  Perform Intra prediction for  luma_4x4 mode:vertical
71//*
72//* @par Description:
73//* Perform Intra prediction for  luma_4x4 mode:vertical ,described in sec 8.3.1.2.1
74//*
75//* @param[in] pu1_src
76//*  UWORD8 pointer to the source
77//*
78//* @param[out] pu1_dst
79//*  UWORD8 pointer to the destination
80//*
81//* @param[in] src_strd
82//*  integer source stride
83//*
84//* @param[in] dst_strd
85//*  integer destination stride
86//*
87//* @param[in] ui_neighboravailability
88//* availability of neighbouring pixels(Not used in this function)
89//*
90//* @returns
91//*
92//* @remarks
93//*  None
94//*
95//*******************************************************************************
96//void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src,
97//                                        UWORD8 *pu1_dst,
98//                                        WORD32 src_strd,
99//                                        WORD32 dst_strd,
100//                                        WORD32 ui_neighboravailability)
101
102//**************Variables Vs Registers*****************************************
103//    x0 => *pu1_src
104//    x1 => *pu1_dst
105//    x2 =>  src_strd
106//    x3 =>  dst_strd
107//   x4 =>  ui_neighboravailability
108
109    .global ih264_intra_pred_luma_4x4_mode_vert_av8
110
111ih264_intra_pred_luma_4x4_mode_vert_av8:
112
113    push_v_regs
114
115    add       x0, x0, #5
116
117    ld1       {v0.s}[0], [x0]
118    st1       {v0.s}[0], [x1], x3
119    st1       {v0.s}[0], [x1], x3
120    st1       {v0.s}[0], [x1], x3
121    st1       {v0.s}[0], [x1], x3
122
123    pop_v_regs
124    ret
125
126
127
128
129
130///******************************************************************************
131
132
133///**
134//*******************************************************************************
135//*
136//*ih264_intra_pred_luma_4x4_mode_horz
137//*
138//* @brief
139//*  Perform Intra prediction for  luma_4x4 mode:horizontal
140//*
141//* @par Description:
142//*  Perform Intra prediction for  luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2
143//*
144//* @param[in] pu1_src
145//*  UWORD8 pointer to the source
146//*
147//* @param[out] pu1_dst
148//*  UWORD8 pointer to the destination
149//*
150//* @param[in] src_strd
151//*  integer source stride
152//*
153//* @param[in] dst_strd
154//*  integer destination stride
155//*
156//* @param[in] ui_neighboravailability
157//* availability of neighbouring pixels(Not used in this function)
158//*
159//* @returns
160//*
161//* @remarks
162//*  None
163//*
164//*******************************************************************************
165//*/
166//void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src,
167//                                         UWORD8 *pu1_dst,
168//                                         WORD32 src_strd,
169//                                         WORD32 dst_strd,
170//                                         WORD32 ui_neighboravailability)
171//**************Variables Vs Registers*****************************************
172//    x0 => *pu1_src
173//    x1 => *pu1_dst
174//    x2 =>  src_strd
175//    x3 =>  dst_strd
176//   x4 =>  ui_neighboravailability
177
178
179
180    .global ih264_intra_pred_luma_4x4_mode_horz_av8
181
182ih264_intra_pred_luma_4x4_mode_horz_av8:
183
184    push_v_regs
185
186    ld1       {v1.s}[0], [x0]
187    dup       v0.8b, v1.b[3]
188    dup       v2.8b, v1.b[2]
189    st1       {v0.s}[0], [x1], x3
190    dup       v3.8b, v1.b[1]
191    st1       {v2.s}[0], [x1], x3
192    dup       v4.8b, v1.b[0]
193    st1       {v3.s}[0], [x1], x3
194    st1       {v4.s}[0], [x1], x3
195
196    pop_v_regs
197    ret
198
199
200
201
202
203
204
205///******************************************************************************
206
207
208///**
209//*******************************************************************************
210//*
211//*ih264_intra_pred_luma_4x4_mode_dc
212//*
213//* @brief
214//*  Perform Intra prediction for  luma_4x4 mode:DC
215//*
216//* @par Description:
217//*  Perform Intra prediction for  luma_4x4 mode:DC ,described in sec 8.3.1.2.3
218//*
219//* @param[in] pu1_src
220//*  UWORD8 pointer to the source
221//*
222//* @param[out] pu1_dst
223//*  UWORD8 pointer to the destination
224//*
225//* @param[in] src_strd
226//*  integer source stride
227//*
228//* @param[in] dst_strd
229//*  integer destination stride
230//*
231//* @param[in] ui_neighboravailability
232//*  availability of neighbouring pixels
233//*
234//* @returns
235//*
236//* @remarks
237//*  None
238//*
239//*******************************************************************************/
240//void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src,
241//                                       UWORD8 *pu1_dst,
242//                                       WORD32 src_strd,
243//                                       WORD32 dst_strd,
244//                                       WORD32 ui_neighboravailability)
245
246//**************Variables Vs Registers*****************************************
247//    x0 => *pu1_src
248//    x1 => *pu1_dst
249//    x2 =>  src_strd
250//    x3 =>  dst_strd
251//   x4 =>  ui_neighboravailability
252
253
254
255    .global ih264_intra_pred_luma_4x4_mode_dc_av8
256
257ih264_intra_pred_luma_4x4_mode_dc_av8:
258
259
260
261
262    push_v_regs
263    stp       x19, x20, [sp, #-16]!
264
265    ands      x5, x4, #0x01
266    beq       top_available             //LEFT NOT AVAILABLE
267
268    add       x10, x0, #3
269    mov       x2, #-1
270    ldrb      w5, [x10], #-1
271    sxtw      x5, w5
272    ldrb      w6, [x10], #-1
273    sxtw      x6, w6
274    ldrb      w7, [x10], #-1
275    sxtw      x7, w7
276    add       x5, x5, x6
277    ldrb      w8, [x10], #-1
278    sxtw      x8, w8
279    add       x5, x5, x7
280    ands      x11, x4, #0x04            // CHECKING IF TOP_AVAILABLE  ELSE BRANCHING TO ONLY LEFT AVAILABLE
281    add       x5, x5, x8
282    beq       left_available
283    add       x10, x0, #5
284    //    BOTH LEFT AND TOP AVAILABLE
285    ldrb      w6, [x10], #1
286    sxtw      x6, w6
287    ldrb      w7, [x10], #1
288    sxtw      x7, w7
289    add       x5, x5, x6
290    ldrb      w8, [x10], #1
291    sxtw      x8, w8
292    add       x5, x5, x7
293    ldrb      w9, [x10], #1
294    sxtw      x9, w9
295    add       x5, x5, x8
296    add       x5, x5, x9
297    add       x5, x5, #4
298    lsr       x5, x5, #3
299    dup       v0.8b, w5
300    st1       {v0.s}[0], [x1], x3
301    st1       {v0.s}[0], [x1], x3
302    st1       {v0.s}[0], [x1], x3
303    st1       {v0.s}[0], [x1], x3
304    b         end_func
305
306top_available: // ONLT TOP AVAILABLE
307    ands      x11, x4, #0x04            // CHECKING TOP AVAILABILTY  OR ELSE BRANCH TO NONE AVAILABLE
308    beq       none_available
309
310    add       x10, x0, #5
311    ldrb      w6, [x10], #1
312    sxtw      x6, w6
313    ldrb      w7, [x10], #1
314    sxtw      x7, w7
315    ldrb      w8, [x10], #1
316    sxtw      x8, w8
317    add       x5, x6, x7
318    ldrb      w9, [x10], #1
319    sxtw      x9, w9
320    add       x5, x5, x8
321    add       x5, x5, x9
322    add       x5, x5, #2
323    lsr       x5, x5, #2
324    dup       v0.8b, w5
325    st1       {v0.s}[0], [x1], x3
326    st1       {v0.s}[0], [x1], x3
327    st1       {v0.s}[0], [x1], x3
328    st1       {v0.s}[0], [x1], x3
329    b         end_func
330
331left_available: //ONLY LEFT AVAILABLE
332    add       x5, x5, #2
333    lsr       x5, x5, #2
334    dup       v0.8b, w5
335    st1       {v0.s}[0], [x1], x3
336    st1       {v0.s}[0], [x1], x3
337    st1       {v0.s}[0], [x1], x3
338    st1       {v0.s}[0], [x1], x3
339    b         end_func
340
341none_available:                         //NONE AVAILABLE
342    mov       x5, #128
343    dup       v0.8b, w5
344    st1       {v0.s}[0], [x1], x3
345    st1       {v0.s}[0], [x1], x3
346    st1       {v0.s}[0], [x1], x3
347    st1       {v0.s}[0], [x1], x3
348    b         end_func
349
350
351end_func:
352
353    ldp       x19, x20, [sp], #16
354    pop_v_regs
355    ret
356
357
358
359
360
361
362
363///**
364//*******************************************************************************
365//*
366//*ih264_intra_pred_luma_4x4_mode_diag_dl
367//*
368//* @brief
369//*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Left
370//*
371//* @par Description:
372//*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4
373//*
374//* @param[in] pu1_src
375//*  UWORD8 pointer to the source
376//*
377//* @param[out] pu1_dst
378//*  UWORD8 pointer to the destination
379//*
380//* @param[in] src_strd
381//*  integer source stride
382//*
383//* @param[in] dst_strd
384//*  integer destination stride
385//*
386//* @param[in] ui_neighboravailability
387//*  availability of neighbouring pixels
388//*
389//* @returns
390//*
391//* @remarks
392//*  None
393//*
394//*******************************************************************************/
395//void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src,
396//                                            UWORD8 *pu1_dst,
397//                                            WORD32 src_strd,
398//                                              WORD32 dst_strd,
399//                                              WORD32 ui_neighboravailability)
400
401//**************Variables Vs Registers*****************************************
402//    x0 => *pu1_src
403//    x1 => *pu1_dst
404//    x2 =>  src_strd
405//    x3 =>  dst_strd
406//   x4 =>  ui_neighboravailability
407
408
409    .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8
410
411ih264_intra_pred_luma_4x4_mode_diag_dl_av8:
412
413
414    push_v_regs
415    stp       x19, x20, [sp, #-16]!
416
417    add       x0, x0, #5
418    sub       x5, x3, #2
419    add       x6, x0, #7
420    ld1       {v0.8b}, [x0]
421    ext       v1.8b, v0.8b , v0.8b , #1
422    ext       v2.8b, v0.8b , v0.8b , #2
423    ld1       {v2.b}[6], [x6]
424    uaddl     v20.8h, v0.8b, v1.8b
425    uaddl     v22.8h, v1.8b, v2.8b
426    add       v24.8h, v20.8h , v22.8h
427    sqrshrun  v3.8b, v24.8h, #2
428    st1       {v3.s}[0], [x1], x3
429    ext       v4.8b, v3.8b , v3.8b , #1
430    st1       {v4.s}[0], [x1], x3
431    st1       {v3.h}[1], [x1], #2
432    st1       {v3.h}[2], [x1], x5
433    st1       {v4.h}[1], [x1], #2
434    st1       {v4.h}[2], [x1]
435
436end_func_diag_dl:
437
438    ldp       x19, x20, [sp], #16
439    pop_v_regs
440    ret
441
442
443
444
445
446
447
448
449
450///**
451//*******************************************************************************
452//*
453//*ih264_intra_pred_luma_4x4_mode_diag_dr
454//*
455//* @brief
456//* Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Right
457//*
458//* @par Description:
459//*  Perform Intra prediction for  luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5
460//*
461//* @param[in] pu1_src
462//*  UWORD8 pointer to the source
463//*
464//* @param[out] pu1_dst
465//*  UWORD8 pointer to the destination
466//*
467//* @param[in] src_strd
468//*  integer source stride
469//*
470//* @param[in] dst_strd
471//*  integer destination stride
472//*
473//* @param[in] ui_neighboravailability
474//*  availability of neighbouring pixels
475//*
476//* @returns
477//*
478//* @remarks
479//*  None
480//*
481//*******************************************************************************/
482//void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src,
483//                                            UWORD8 *pu1_dst,
484//                                            WORD32 src_strd,
485//                                              WORD32 dst_strd,
486//                                              WORD32 ui_neighboravailability)
487
488//**************Variables Vs Registers*****************************************
489//    x0 => *pu1_src
490//    x1 => *pu1_dst
491//    x2 =>  src_strd
492//    x3 =>  dst_strd
493//   x4 =>  ui_neighboravailability
494
495
496    .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8
497
498ih264_intra_pred_luma_4x4_mode_diag_dr_av8:
499
500    push_v_regs
501    stp       x19, x20, [sp, #-16]!
502
503
504    ld1       {v0.8b}, [x0]
505    add       x0, x0, #1
506    ld1       {v1.8b}, [x0]
507    ext       v2.8b, v1.8b , v1.8b , #1
508    uaddl     v20.8h, v0.8b, v1.8b
509    uaddl     v22.8h, v1.8b, v2.8b
510    add       v24.8h, v20.8h , v22.8h
511    sqrshrun  v3.8b, v24.8h, #2
512
513    ext       v4.8b, v3.8b , v3.8b , #1
514    sub       x5, x3, #2
515    st1       {v4.h}[1], [x1], #2
516    st1       {v4.h}[2], [x1], x5
517    st1       {v3.h}[1], [x1], #2
518    st1       {v3.h}[2], [x1], x5
519    st1       {v4.s}[0], [x1], x3
520    st1       {v3.s}[0], [x1], x3
521
522end_func_diag_dr:
523    ldp       x19, x20, [sp], #16
524    pop_v_regs
525    ret
526
527
528
529
530
531
532
533///**
534//*******************************************************************************
535//*
536//*ih264_intra_pred_luma_4x4_mode_vert_r
537//*
538//* @brief
539//* Perform Intra prediction for  luma_4x4 mode:Vertical_Right
540//*
541//* @par Description:
542//*   Perform Intra prediction for  luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6
543//*
544//* @param[in] pu1_src
545//*  UWORD8 pointer to the source
546//*
547//* @param[out] pu1_dst
548//*  UWORD8 pointer to the destination
549//*
550//* @param[in] src_strd
551//*  integer source stride
552//*
553//* @param[in] dst_strd
554//*  integer destination stride
555//*
556//* @param[in] ui_neighboravailability
557//*  availability of neighbouring pixels
558//*
559//* @returns
560//*
561//* @remarks
562//*  None
563//*
564//*******************************************************************************/
565//void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src,
566//                                            UWORD8 *pu1_dst,
567//                                            WORD32 src_strd,
568//                                              WORD32 dst_strd,
569//                                              WORD32 ui_neighboravailability)
570
571//**************Variables Vs Registers*****************************************
572//    x0 => *pu1_src
573//    x1 => *pu1_dst
574//    x2 =>  src_strd
575//    x3 =>  dst_strd
576//   x4 =>  ui_neighboravailability
577
578
579    .global ih264_intra_pred_luma_4x4_mode_vert_r_av8
580
581ih264_intra_pred_luma_4x4_mode_vert_r_av8:
582
583    push_v_regs
584    stp       x19, x20, [sp, #-16]!
585
586
587    ld1       {v0.8b}, [x0]
588    add       x0, x0, #1
589    ld1       {v1.8b}, [x0]
590    ext       v2.8b, v1.8b , v1.8b , #1
591    uaddl     v20.8h, v0.8b, v1.8b
592    uaddl     v22.8h, v1.8b, v2.8b
593    add       v24.8h, v20.8h , v22.8h
594    sqrshrun  v4.8b, v20.8h, #1
595    sqrshrun  v3.8b, v24.8h, #2
596    sub       x5, x3, #2
597    ext       v5.8b, v3.8b , v3.8b , #3
598    st1       {v4.s}[1], [x1], x3
599    st1       {v5.s}[0], [x1], x3
600    sub       x8, x3, #3
601    st1       {v3.b}[2], [x1], #1
602    st1       {v4.h}[2], [x1], #2
603    st1       {v4.b}[6], [x1], x8
604    st1       {v3.b}[1], [x1], #1
605    st1       {v5.h}[0], [x1], #2
606    st1       {v5.b}[2], [x1]
607
608
609end_func_vert_r:
610    ldp       x19, x20, [sp], #16
611    pop_v_regs
612    ret
613
614
615
616
617
618///**
619//*******************************************************************************
620//*
621//*ih264_intra_pred_luma_4x4_mode_horz_d
622//*
623//* @brief
624//* Perform Intra prediction for  luma_4x4 mode:Horizontal_Down
625//*
626//* @par Description:
627//*   Perform Intra prediction for  luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7
628//*
629//* @param[in] pu1_src
630//*  UWORD8 pointer to the source
631//*
632//* @param[out] pu1_dst
633//*  UWORD8 pointer to the destination
634//*
635//* @param[in] src_strd
636//*  integer source stride
637//*
638//* @param[in] dst_strd
639//*  integer destination stride
640//*
641//* @param[in] ui_neighboravailability
642//*  availability of neighbouring pixels
643//*
644//* @returns
645//*
646//* @remarks
647//*  None
648//*
649//*******************************************************************************/
650//void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src,
651//                                            UWORD8 *pu1_dst,
652//                                            WORD32 src_strd,
653//                                              WORD32 dst_strd,
654//                                              WORD32 ui_neighboravailability)
655
656//**************Variables Vs Registers*****************************************
657//    x0 => *pu1_src
658//    x1 => *pu1_dst
659//    x2 =>  src_strd
660//    x3 =>  dst_strd
661//   x4 =>  ui_neighboravailability
662
663
664    .global ih264_intra_pred_luma_4x4_mode_horz_d_av8
665
666ih264_intra_pred_luma_4x4_mode_horz_d_av8:
667
668    push_v_regs
669    stp       x19, x20, [sp, #-16]!
670
671    ld1       {v0.8b}, [x0]
672    add       x0, x0, #1
673    ld1       {v1.8b}, [x0]
674    ext       v2.8b, v1.8b , v0.8b , #1
675    uaddl     v20.8h, v0.8b, v1.8b
676    uaddl     v22.8h, v1.8b, v2.8b
677    add       v24.8h, v20.8h , v22.8h
678    sqrshrun  v4.8b, v20.8h, #1
679    sqrshrun  v5.8b, v24.8h, #2
680    sub       x5, x3, #2
681    mov       v6.8b, v5.8b
682    trn1      v10.8b, v4.8b, v5.8b
683    trn2      v5.8b, v4.8b, v5.8b       //
684    mov       v4.8b, v10.8b
685    st1       {v5.h}[1], [x1], #2
686    st1       {v6.h}[2], [x1], x5
687    st1       {v4.h}[1], [x1], #2
688    st1       {v5.h}[1], [x1], x5
689    st1       {v5.h}[0], [x1], #2
690    st1       {v4.h}[1], [x1], x5
691    st1       {v4.h}[0], [x1], #2
692    st1       {v5.h}[0], [x1], x5
693
694end_func_horz_d:
695    ldp       x19, x20, [sp], #16
696    pop_v_regs
697    ret
698
699
700
701
702
703
704
705///**
706//*******************************************************************************
707//*
708//*ih264_intra_pred_luma_4x4_mode_vert_l
709//*
710//* @brief
711//*  Perform Intra prediction for  luma_4x4 mode:Vertical_Left
712//*
713//* @par Description:
714//*   Perform Intra prediction for  luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8
715//*
716//* @param[in] pu1_src
717//*  UWORD8 pointer to the source
718//*
719//* @param[out] pu1_dst
720//*  UWORD8 pointer to the destination
721//*
722//* @param[in] src_strd
723//*  integer source stride
724//*
725//* @param[in] dst_strd
726//*  integer destination stride
727//*
728//* @param[in] ui_neighboravailability
729//*  availability of neighbouring pixels
730//*
731//* @returns
732//*
733//* @remarks
734//*  None
735//*
736//*******************************************************************************/
737//void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src,
738//                                            UWORD8 *pu1_dst,
739//                                            WORD32 src_strd,
740//                                              WORD32 dst_strd,
741//                                              WORD32 ui_neighboravailability)
742
743//**************Variables Vs Registers*****************************************
744//    x0 => *pu1_src
745//    x1 => *pu1_dst
746//    x2 =>  src_strd
747//    x3 =>  dst_strd
748//   x4 =>  ui_neighboravailability
749
750
751    .global ih264_intra_pred_luma_4x4_mode_vert_l_av8
752
753ih264_intra_pred_luma_4x4_mode_vert_l_av8:
754
755    push_v_regs
756    stp       x19, x20, [sp, #-16]!
757    add       x0, x0, #4
758    ld1       {v0.8b}, [x0]
759    add       x0, x0, #1
760    ld1       {v1.8b}, [x0]
761    ext       v2.8b, v1.8b , v0.8b , #1
762    uaddl     v20.8h, v0.8b, v1.8b
763    uaddl     v22.8h, v1.8b, v2.8b
764    add       v24.8h, v20.8h , v22.8h
765    sqrshrun  v4.8b, v20.8h, #1
766    sqrshrun  v5.8b, v24.8h, #2
767    ext       v6.8b, v4.8b , v4.8b , #1
768    ext       v7.8b, v5.8b , v5.8b , #1
769    st1       {v6.s}[0], [x1], x3
770    ext       v8.8b, v4.8b , v4.8b , #2
771    ext       v9.8b, v5.8b , v5.8b , #2
772    st1       {v7.s}[0], [x1], x3
773    st1       {v8.s}[0], [x1], x3
774    st1       {v9.s}[0], [x1], x3
775
776end_func_vert_l:
777    ldp       x19, x20, [sp], #16
778    pop_v_regs
779    ret
780
781
782
783
784
785
786
787///**
788//*******************************************************************************
789//*
790//*ih264_intra_pred_luma_4x4_mode_horz_u
791//*
792//* @brief
793//*     Perform Intra prediction for  luma_4x4 mode:Horizontal_Up
794//*
795//* @par Description:
796//*      Perform Intra prediction for  luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9
797//*
798//* @param[in] pu1_src
799//*  UWORD8 pointer to the source
800//*
801//* @param[out] pu1_dst
802//*  UWORD8 pointer to the destination
803//*
804//* @param[in] src_strd
805//*  integer source stride
806//*
807//* @param[in] dst_strd
808//*  integer destination stride
809//*
810//* @param[in] ui_neighboravailability
811//*  availability of neighbouring pixels
812//*
813//* @returns
814//*
815//* @remarks
816//*  None
817//*
818//*******************************************************************************/
819//void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src,
820//                                           UWORD8 *pu1_dst,
821//                                           WORD32 src_strd,
822//                                             WORD32 dst_strd,
823//                                             WORD32 ui_neighboravailability)
824
825//**************Variables Vs Registers*****************************************
826//    x0 => *pu1_src
827//    x1 => *pu1_dst
828//    x2 =>  src_strd
829//    x3 =>  dst_strd
830//   x4 =>  ui_neighboravailability
831
832
833    .global ih264_intra_pred_luma_4x4_mode_horz_u_av8
834
835ih264_intra_pred_luma_4x4_mode_horz_u_av8:
836
837    push_v_regs
838    stp       x19, x20, [sp, #-16]!
839    mov       x10, x0
840    ld1       {v0.8b}, [x0]
841    ldrb      w9, [x0], #1
842    sxtw      x9, w9
843    ext       v1.8b, v0.8b , v0.8b , #1
844    ld1       {v0.b}[7], [x10]
845    ext       v2.8b, v1.8b , v1.8b , #1
846    uaddl     v20.8h, v0.8b, v1.8b
847    uaddl     v22.8h, v1.8b, v2.8b
848    add       v24.8h, v20.8h , v22.8h
849    sqrshrun  v4.8b, v20.8h, #1
850    sqrshrun  v5.8b, v24.8h, #2
851    mov       v6.8b, v4.8b
852    ext       v6.8b, v5.8b , v4.8b , #1
853    st1       {v4.b}[2], [x1], #1
854    st1       {v6.b}[0], [x1], #1
855    trn1      v10.8b, v6.8b, v5.8b
856    trn2      v5.8b, v6.8b, v5.8b       //
857    mov       v6.8b , v10.8b
858    sub       x5, x3, #2
859    trn1      v10.8b, v4.8b, v6.8b
860    trn2      v6.8b, v4.8b, v6.8b       //
861    mov       v4.8b , v10.8b
862    dup       v7.8b, w9
863    st1       {v6.h}[0], [x1], x5
864    st1       {v6.h}[0], [x1], #2
865    st1       {v5.h}[3], [x1], x5
866    st1       {v5.h}[3], [x1], #2
867    st1       {v7.h}[3], [x1], x5
868    st1       {v7.s}[0], [x1], x3
869
870end_func_horz_u:
871    ldp       x19, x20, [sp], #16
872    pop_v_regs
873    ret
874
875
876
877