1//******************************************************************************
2//*
3//* Copyright (C) 2015 The Android Open Source Project
4//*
5//* Licensed under the Apache License, Version 2.0 (the "License");
6//* you may not use this file except in compliance with the License.
7//* You may obtain a copy of the License at:
8//*
9//* http://www.apache.org/licenses/LICENSE-2.0
10//*
11//* Unless required by applicable law or agreed to in writing, software
12//* distributed under the License is distributed on an "AS IS" BASIS,
13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14//* See the License for the specific language governing permissions and
15//* limitations under the License.
16//*
17//*****************************************************************************
18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19//*/
20///**
21//******************************************************************************
22//* @file
23//*  ih264_intra_pred_luma_8x8_av8.s
24//*
25//* @brief
26//*  Contains function definitions for intra 8x8 Luma prediction .
27//*
28//* @author
29//*  Ittiam
30//*
31//* @par List of Functions:
32//*
33//*  -ih264_intra_pred_luma_8x8_mode_vert_av8
34//*  -ih264_intra_pred_luma_8x8_mode_horz_av8
35//*  -ih264_intra_pred_luma_8x8_mode_dc_av8
36//*  -ih264_intra_pred_luma_8x8_mode_diag_dl_av8
37//*  -ih264_intra_pred_luma_8x8_mode_diag_dr_av8
38//*  -ih264_intra_pred_luma_8x8_mode_vert_r_av8
39//*  -ih264_intra_pred_luma_8x8_mode_horz_d_av8
40//*  -ih264_intra_pred_luma_8x8_mode_vert_l_av8
41//*  -ih264_intra_pred_luma_8x8_mode_horz_u_av8
42//*
43//* @remarks
44//*  None
45//*
46//*******************************************************************************
47//*/
48
49///* All the functions here are replicated from ih264_intra_pred_filters.c
50//
51
52///**
53///**
54///**
55
56.text
57.p2align 2
58.include "ih264_neon_macros.s"
59
60.extern ih264_gai1_intrapred_luma_8x8_horz_u
61
62
63
64///**
65//*******************************************************************************
66//*
67//*ih264_intra_pred_luma_8x8_mode_vert
68//*
69//* @brief
70//*   Perform Intra prediction for  luma_8x8 mode:vertical
71//*
72//* @par Description:
73//* Perform Intra prediction for  luma_8x8 mode:vertical ,described in sec 8.3.2.2.2
74//*
75//* @param[in] pu1_src
76//*  UWORD8 pointer to the source
77//*
78//* @param[out] pu1_dst
79//*  UWORD8 pointer to the destination
80//*
81//* @param[in] src_strd
82//*  integer source stride
83//*
84//* @param[in] dst_strd
85//*  integer destination stride
86//*
87//* @param[in] ui_neighboravailability
88//* availability of neighbouring pixels(Not used in this function)
89//*
90//* @returns
91//*
92//* @remarks
93//*  None
94//*
95//*******************************************************************************
96//void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src,
97//                                        UWORD8 *pu1_dst,
98//                                        WORD32 src_strd,
99//                                        WORD32 dst_strd,
100//                                        WORD32 ui_neighboravailability)
101
102//**************Variables Vs Registers*****************************************
103//    x0 => *pu1_src
104//    x1 => *pu1_dst
105//    w2 =>  src_strd
106//    w3 =>  dst_strd
107//    w4 =>  ui_neighboravailability
108
109
110    .global ih264_intra_pred_luma_8x8_mode_vert_av8
111
112ih264_intra_pred_luma_8x8_mode_vert_av8:
113
114    // STMFD sp!, {x4-x12, x14}          //store register values to stack
115    push_v_regs
116    //stp x19, x20,[sp,#-16]!
117    sxtw      x3, w3
118
119    add       x0, x0, #9
120    ld1       {v0.8b}, [x0]
121
122    st1       {v0.8b}, [x1], x3
123    st1       {v0.8b}, [x1], x3
124    st1       {v0.8b}, [x1], x3
125    st1       {v0.8b}, [x1], x3
126    st1       {v0.8b}, [x1], x3
127    st1       {v0.8b}, [x1], x3
128    st1       {v0.8b}, [x1], x3
129    st1       {v0.8b}, [x1], x3
130
131    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
132    //ldp x19, x20,[sp],#16
133    pop_v_regs
134    ret
135
136
137
138
139
140///******************************************************************************
141
142
143///**
144//*******************************************************************************
145//*
146//*ih264_intra_pred_luma_8x8_mode_horz
147//*
148//* @brief
149//*  Perform Intra prediction for  luma_8x8 mode:horizontal
150//*
151//* @par Description:
152//*  Perform Intra prediction for  luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2
153//*
154//* @param[in] pu1_src
155//*  UWORD8 pointer to the source
156//*
157//* @param[out] pu1_dst
158//*  UWORD8 pointer to the destination
159//*
160//* @param[in] src_strd
161//*  integer source stride
162//*
163//* @param[in] dst_strd
164//*  integer destination stride
165//*
166//* @param[in] ui_neighboravailability
167//* availability of neighbouring pixels(Not used in this function)
168//*
169//* @returns
170//*
171//* @remarks
172//*  None
173//*
174//*******************************************************************************
175//*/
176//void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src,
177//                                         UWORD8 *pu1_dst,
178//                                         WORD32 src_strd,
179//                                         WORD32 dst_strd,
180//                                         WORD32 ui_neighboravailability)
181//**************Variables Vs Registers*****************************************
182//    x0 => *pu1_src
183//    x1 => *pu1_dst
184//    w2 =>  src_strd
185//    w3 =>  dst_strd
186//    w4 =>  ui_neighboravailability
187
188
189    .global ih264_intra_pred_luma_8x8_mode_horz_av8
190
191ih264_intra_pred_luma_8x8_mode_horz_av8:
192
193
194
195    // STMFD sp!, {x4-x12, x14}          //store register values to stack
196    push_v_regs
197    stp       x19, x20, [sp, #-16]!
198    sxtw      x3, w3
199    add       x0, x0, #7
200
201    ldrb      w5, [x0], #-1
202    ldrb      w6, [x0], #-1
203    dup       v0.8b, w5
204    st1       {v0.8b}, [x1], x3
205    ldrb      w7, [x0], #-1
206    dup       v1.8b, w6
207    st1       {v1.8b}, [x1], x3
208    dup       v2.8b, w7
209    ldrb      w8, [x0], #-1
210    dup       v3.8b, w8
211    st1       {v2.8b}, [x1], x3
212    ldrb      w5, [x0], #-1
213    st1       {v3.8b}, [x1], x3
214    dup       v0.8b, w5
215    ldrb      w6, [x0], #-1
216    st1       {v0.8b}, [x1], x3
217    ldrb      w7, [x0], #-1
218    dup       v1.8b, w6
219    dup       v2.8b, w7
220    st1       {v1.8b}, [x1], x3
221    ldrb      w8, [x0], #-1
222    dup       v3.8b, w8
223    st1       {v2.8b}, [x1], x3
224    st1       {v3.8b}, [x1], x3
225
226    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
227    ldp       x19, x20, [sp], #16
228    pop_v_regs
229    ret
230
231
232
233
234
235
236
237///******************************************************************************
238
239
240///**
241//*******************************************************************************
242//*
243//*ih264_intra_pred_luma_8x8_mode_dc
244//*
245//* @brief
246//*  Perform Intra prediction for  luma_8x8 mode:DC
247//*
248//* @par Description:
249//*  Perform Intra prediction for  luma_8x8 mode:DC ,described in sec 8.3.2.2.3
250//*
251//* @param[in] pu1_src
252//*  UWORD8 pointer to the source
253//*
254//* @param[out] pu1_dst
255//*  UWORD8 pointer to the destination
256//*
257//* @param[in] src_strd
258//*  integer source stride
259//*
260//* @param[in] dst_strd
261//*  integer destination stride
262//*
263//* @param[in] ui_neighboravailability
264//*  availability of neighbouring pixels
265//*
266//* @returns
267//*
268//* @remarks
269//*  None
270//*
271//*******************************************************************************/
272//void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src,
273//                                       UWORD8 *pu1_dst,
274//                                       WORD32 src_strd,
275//                                       WORD32 dst_strd,
276//                                       WORD32 ui_neighboravailability)
277
278//**************Variables Vs Registers*****************************************
279//    x0 => *pu1_src
280//    x1 => *pu1_dst
281//    w2 =>  src_strd
282//    w3 =>  dst_strd
283//    w4 =>  ui_neighboravailability
284
285
286    .global ih264_intra_pred_luma_8x8_mode_dc_av8
287
288ih264_intra_pred_luma_8x8_mode_dc_av8:
289
290
291
292    // STMFD sp!, {x4-x12, x14}          //store register values to stack
293    push_v_regs
294    sxtw      x3, w3
295    stp       x19, x20, [sp, #-16]!
296
297    ands      w6, w4, #0x01
298    beq       top_available             //LEFT NOT AVAILABLE
299
300    add       x10, x0, #7
301    mov       x2, #-1
302    ldrb      w5, [x10], -1
303    ldrb      w6, [x10], -1
304    ldrb      w7, [x10], -1
305    add       w5, w5, w6
306    ldrb      w8, [x10], -1
307    add       w5, w5, w7
308    ldrb      w6, [x10], -1
309    add       w5, w5, w8
310    ldrb      w7, [x10], -1
311    add       w5, w5, w6
312    ldrb      w8, [x10], -1
313    add       w5, w5, w7
314    ands      w11, w4, #0x04            // CHECKING IF TOP_AVAILABLE  ELSE BRANCHING TO ONLY LEFT AVAILABLE
315    add       w5, w5, w8
316    ldrb      w6, [x10], -1
317    add       w5, w5, w6
318    beq       left_available
319    add       x10, x0, #9
320    //    BOTH LEFT AND TOP AVAILABLE
321    ld1       {v0.8b}, [x10]
322    uaddlp    v1.4h, v0.8b
323    uaddlp    v3.2s, v1.4h
324    uaddlp    v2.1d, v3.2s
325    dup       v10.8h, w5
326    dup       v8.8h, v2.h[0]
327    add       v12.8h, v8.8h , v10.8h
328    sqrshrun  v31.8b, v12.8h, #4
329    st1       {v31.8b}, [x1], x3
330    st1       {v31.8b}, [x1], x3
331    st1       {v31.8b}, [x1], x3
332    st1       {v31.8b}, [x1], x3
333    st1       {v31.8b}, [x1], x3
334    st1       {v31.8b}, [x1], x3
335    st1       {v31.8b}, [x1], x3
336    st1       {v31.8b}, [x1], x3
337    b         end_func
338
339top_available: // ONLT TOP AVAILABLE
340    ands      w11, w4, #0x04            // CHECKING TOP AVAILABILTY  OR ELSE BRANCH TO NONE AVAILABLE
341    beq       none_available
342
343    add       x10, x0, #9
344    ld1       {v10.8b}, [x10]
345    uaddlp    v14.4h, v10.8b
346    uaddlp    v13.2s, v14.4h
347    uaddlp    v12.1d, v13.2s
348    rshrn     v4.8b, v12.8h, #3
349    dup       v31.8b, v4.b[0]
350    st1       {v31.8b}, [x1], x3
351    st1       {v31.8b}, [x1], x3
352    st1       {v31.8b}, [x1], x3
353    st1       {v31.8b}, [x1], x3
354    st1       {v31.8b}, [x1], x3
355    st1       {v31.8b}, [x1], x3
356    st1       {v31.8b}, [x1], x3
357    st1       {v31.8b}, [x1], x3
358    b         end_func
359
360
361left_available: //ONLY LEFT AVAILABLE
362    add       x5, x5, #4
363    lsr       x5, x5, #3
364    dup       v0.8b, w5
365    st1       {v0.8b}, [x1], x3
366    st1       {v0.8b}, [x1], x3
367    st1       {v0.8b}, [x1], x3
368    st1       {v0.8b}, [x1], x3
369    st1       {v0.8b}, [x1], x3
370    st1       {v0.8b}, [x1], x3
371    st1       {v0.8b}, [x1], x3
372    st1       {v0.8b}, [x1], x3
373    b         end_func
374
375none_available:                         //NONE AVAILABLE
376    mov       x9, #128
377    dup       v0.8b, w9
378    st1       {v0.8b}, [x1], x3
379    st1       {v0.8b}, [x1], x3
380    st1       {v0.8b}, [x1], x3
381    st1       {v0.8b}, [x1], x3
382    st1       {v0.8b}, [x1], x3
383    st1       {v0.8b}, [x1], x3
384    st1       {v0.8b}, [x1], x3
385    st1       {v0.8b}, [x1], x3
386
387
388end_func:
389
390    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
391    ldp       x19, x20, [sp], #16
392    pop_v_regs
393    ret
394
395
396
397
398
399
400///**
401//*******************************************************************************
402//*
403//*ih264_intra_pred_luma_8x8_mode_diag_dl
404//*
405//* @brief
406//*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Left
407//*
408//* @par Description:
409//*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4
410//*
411//* @param[in] pu1_src
412//*  UWORD8 pointer to the source
413//*
414//* @param[out] pu1_dst
415//*  UWORD8 pointer to the destination
416//*
417//* @param[in] src_strd
418//*  integer source stride
419//*
420//* @param[in] dst_strd
421//*  integer destination stride
422//*
423//* @param[in] ui_neighboravailability
424//*  availability of neighbouring pixels
425//*
426//* @returns
427//*
428//* @remarks
429//*  None
430//*
431//*******************************************************************************/
432//void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src,
433//                                            UWORD8 *pu1_dst,
434//                                            WORD32 src_strd,
435//                                              WORD32 dst_strd,
436//                                              WORD32 ui_neighboravailability)
437
438//**************Variables Vs Registers*****************************************
439//    x0 => *pu1_src
440//    x1 => *pu1_dst
441//    w2 =>  src_strd
442//    w3 =>  dst_strd
443//    w4 =>  ui_neighboravailability
444
445    .global ih264_intra_pred_luma_8x8_mode_diag_dl_av8
446
447ih264_intra_pred_luma_8x8_mode_diag_dl_av8:
448
449    // STMFD sp!, {x4-x12, x14}          //store register values to stack
450    push_v_regs
451    stp       x19, x20, [sp, #-16]!
452    sxtw      x3, w3
453
454    add       x0, x0, #9
455    sub       x5, x3, #4
456    add       x6, x0, #15
457    ld1       { v0.16b}, [x0]
458    mov       v1.d[0], v0.d[1]
459    ext       v4.16b, v0.16b , v0.16b , #2
460    mov       v5.d[0], v4.d[1]
461    ext       v2.16b, v0.16b , v0.16b , #1
462    mov       v3.d[0], v2.d[1]
463    ld1       {v5.b}[6], [x6]
464    // q1 = q0 shifted to left once
465    // q2 = q1 shifted to left once
466    uaddl     v20.8h, v0.8b, v2.8b      //Adding for FILT121
467    uaddl     v22.8h, v1.8b, v3.8b
468    uaddl     v24.8h, v2.8b, v4.8b
469    uaddl     v26.8h, v3.8b, v5.8b
470    add       v24.8h, v20.8h , v24.8h
471    add       v26.8h, v22.8h , v26.8h
472
473    sqrshrun  v4.8b, v24.8h, #2
474    sqrshrun  v5.8b, v26.8h, #2
475    mov       v4.d[1], v5.d[0]
476    //Q2 has all FILT121 values
477    st1       {v4.8b}, [x1], x3
478    ext       v18.16b, v4.16b , v4.16b , #1
479    ext       v16.16b, v18.16b , v18.16b , #1
480    st1       {v18.8b}, [x1], x3
481    ext       v14.16b, v16.16b , v16.16b , #1
482    st1       {v16.8b}, [x1], x3
483    st1       {v14.8b}, [x1], x3
484    st1       {v4.s}[1], [x1], #4
485    st1       {v5.s}[0], [x1], x5
486    st1       {v18.s}[1], [x1], #4
487    st1       {v18.s}[2], [x1], x5
488    st1       {v16.s}[1], [x1], #4
489    st1       {v16.s}[2], [x1], x5
490    st1       {v14.s}[1], [x1], #4
491    st1       {v14.s}[2], [x1], x5
492
493
494end_func_diag_dl:
495    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
496    ldp       x19, x20, [sp], #16
497    pop_v_regs
498    ret
499
500
501
502
503///**
504//*******************************************************************************
505//*
506//*ih264_intra_pred_luma_8x8_mode_diag_dr
507//*
508//* @brief
509//* Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Right
510//*
511//* @par Description:
512//*  Perform Intra prediction for  luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5
513//*
514//* @param[in] pu1_src
515//*  UWORD8 pointer to the source
516//*
517//* @param[out] pu1_dst
518//*  UWORD8 pointer to the destination
519//*
520//* @param[in] src_strd
521//*  integer source stride
522//*
523//* @param[in] dst_strd
524//*  integer destination stride
525//*
526//* @param[in] ui_neighboravailability
527//*  availability of neighbouring pixels
528//*
529//* @returns
530//*
531//* @remarks
532//*  None
533//*
534//*******************************************************************************/
535//void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src,
536//                                            UWORD8 *pu1_dst,
537//                                            WORD32 src_strd,
538//                                              WORD32 dst_strd,
539//                                              WORD32 ui_neighboravailability)
540
541//**************Variables Vs Registers*****************************************
542//    x0 => *pu1_src
543//    x1 => *pu1_dst
544//    w2 =>  src_strd
545//    w3 =>  dst_strd
546//    w4 =>  ui_neighboravailability
547
548
549    .global ih264_intra_pred_luma_8x8_mode_diag_dr_av8
550
551ih264_intra_pred_luma_8x8_mode_diag_dr_av8:
552
553    // STMFD sp!, {x4-x12, x14}          //store register values to stack
554    push_v_regs
555    stp       x19, x20, [sp, #-16]!
556    sxtw      x3, w3
557
558
559    ld1       { v0.16b}, [x0]
560    mov       v1.d[0], v0.d[1]
561    add       x0, x0, #1
562    ld1       { v2.16b}, [x0]
563    mov       v3.d[0], v2.d[1]
564    ext       v4.16b, v2.16b , v2.16b , #1
565    mov       v5.d[0], v4.d[1]
566    // q1 = q0 shifted to left once
567    // q2 = q1 shifted to left once
568    uaddl     v20.8h, v0.8b, v2.8b      //Adding for FILT121
569    uaddl     v22.8h, v1.8b, v3.8b
570    uaddl     v24.8h, v2.8b, v4.8b
571    uaddl     v26.8h, v3.8b, v5.8b
572    add       v24.8h, v20.8h , v24.8h
573    add       v26.8h, v22.8h , v26.8h
574    sqrshrun  v4.8b, v24.8h, #2
575    sqrshrun  v5.8b, v26.8h, #2
576    mov       v4.d[1], v5.d[0]
577    //Q2 has all FILT121 values
578    sub       x5, x3, #4
579    ext       v18.16b, v4.16b , v4.16b , #15
580    st1       {v18.d}[1], [x1], x3
581    ext       v16.16b, v18.16b , v18.16b , #15
582    st1       {v16.d}[1], [x1], x3
583    ext       v14.16b, v16.16b , v16.16b , #15
584    st1       {v14.d}[1], [x1], x3
585    st1       {v4.s}[1], [x1], #4
586    st1       {v5.s}[0], [x1], x5
587    st1       {v18.s}[1], [x1], #4
588    st1       {v18.s}[2], [x1], x5
589    st1       {v16.s}[1], [x1], #4
590    st1       {v16.s}[2], [x1], x5
591    st1       {v14.s}[1], [x1], #4
592    st1       {v14.s}[2], [x1], x5
593    st1       {v4.8b}, [x1], x3
594
595end_func_diag_dr:
596    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
597    ldp       x19, x20, [sp], #16
598    pop_v_regs
599    ret
600
601
602
603
604///**
605//*******************************************************************************
606//*
607//*ih264_intra_pred_luma_8x8_mode_vert_r
608//*
609//* @brief
610//* Perform Intra prediction for  luma_8x8 mode:Vertical_Right
611//*
612//* @par Description:
613//*   Perform Intra prediction for  luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6
614//*
615//* @param[in] pu1_src
616//*  UWORD8 pointer to the source
617//*
618//* @param[out] pu1_dst
619//*  UWORD8 pointer to the destination
620//*
621//* @param[in] src_strd
622//*  integer source stride
623//*
624//* @param[in] dst_strd
625//*  integer destination stride
626//*
627//* @param[in] ui_neighboravailability
628//*  availability of neighbouring pixels
629//*
630//* @returns
631//*
632//* @remarks
633//*  None
634//*
635//*******************************************************************************/
636//void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src,
637//                                            UWORD8 *pu1_dst,
638//                                            WORD32 src_strd,
639//                                              WORD32 dst_strd,
640//                                              WORD32 ui_neighboravailability)
641
642//**************Variables Vs Registers*****************************************
643//    x0 => *pu1_src
644//    x1 => *pu1_dst
645//    w2 =>  src_strd
646//    w3 =>  dst_strd
647//    w4 =>  ui_neighboravailability
648
649
650    .global ih264_intra_pred_luma_8x8_mode_vert_r_av8
651
652ih264_intra_pred_luma_8x8_mode_vert_r_av8:
653
654    // STMFD sp!, {x4-x12, x14}          //store register values to stack
655    push_v_regs
656    stp       x19, x20, [sp, #-16]!
657    sxtw      x3, w3
658
659    ld1       { v0.16b}, [x0]
660    mov       v1.d[0], v0.d[1]
661    add       x0, x0, #1
662    ld1       { v2.16b}, [x0]
663    mov       v3.d[0], v2.d[1]
664    ext       v4.16b, v2.16b , v2.16b , #1
665    mov       v5.d[0], v4.d[1]
666    // q1 = q0 shifted to left once
667    // q2 = q1 shifted to left once
668    uaddl     v20.8h, v0.8b, v2.8b
669    uaddl     v22.8h, v1.8b, v3.8b
670    uaddl     v24.8h, v2.8b, v4.8b
671    uaddl     v26.8h, v3.8b, v5.8b
672    add       v24.8h, v20.8h , v24.8h
673    add       v26.8h, v22.8h , v26.8h
674
675    sqrshrun  v4.8b, v20.8h, #1
676    sqrshrun  v5.8b, v22.8h, #1
677    mov       v4.d[1], v5.d[0]
678    sqrshrun  v6.8b, v24.8h, #2
679    sqrshrun  v7.8b, v26.8h, #2
680    mov       v6.d[1], v7.d[0]
681    //Q2 has all FILT11 values
682    //Q3 has all FILT121 values
683    sub       x5, x3, #6
684    sub       x6, x3, #4
685    st1       {v5.8b}, [x1], x3         // row 0
686    ext       v18.16b, v6.16b , v6.16b , #15
687    mov       v22.16b , v18.16b
688    ext       v16.16b, v4.16b , v4.16b , #1
689    st1       {v18.d}[1], [x1], x3      //row 1
690    mov       v14.16b , v16.16b
691    ext       v20.16b, v4.16b , v4.16b , #15
692    uzp1      v17.16b, v16.16b, v18.16b
693    uzp2      v18.16b, v16.16b, v18.16b
694    mov       v16.16b , v17.16b
695    //row 2
696    ext       v12.16b, v16.16b , v16.16b , #1
697    st1       {v20.d}[1], [x1]
698    st1       {v6.b}[6], [x1], x3
699    //row 3
700
701    st1       {v12.h}[5], [x1], #2
702    st1       {v6.s}[2], [x1], #4
703    st1       {v6.h}[6], [x1], x5
704    //row 4
705    st1       {v18.h}[5], [x1], #2
706    st1       {v4.s}[2], [x1], #4
707    st1       {v4.h}[6], [x1], x5
708    //row 5
709    ext       v26.16b, v18.16b , v18.16b , #1
710    st1       {v16.h}[5], [x1], #2
711    st1       {v22.s}[2], [x1], #4
712    st1       {v22.h}[6], [x1], x5
713    //row 6
714    st1       {v26.h}[4], [x1], #2
715    st1       {v26.b}[10], [x1], #1
716    st1       {v4.b}[8], [x1], #1
717    st1       {v14.s}[2], [x1], x6
718    //row 7
719    st1       {v12.s}[2], [x1], #4
720    st1       {v6.s}[2], [x1], #4
721
722end_func_vert_r:
723    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
724    ldp       x19, x20, [sp], #16
725    pop_v_regs
726    ret
727
728
729
730
731///**
732//*******************************************************************************
733//*
734//*ih264_intra_pred_luma_8x8_mode_horz_d
735//*
736//* @brief
737//* Perform Intra prediction for  luma_8x8 mode:Horizontal_Down
738//*
739//* @par Description:
740//*   Perform Intra prediction for  luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7
741//*
742//* @param[in] pu1_src
743//*  UWORD8 pointer to the source
744//*
745//* @param[out] pu1_dst
746//*  UWORD8 pointer to the destination
747//*
748//* @param[in] src_strd
749//*  integer source stride
750//*
751//* @param[in] dst_strd
752//*  integer destination stride
753//*
754//* @param[in] ui_neighboravailability
755//*  availability of neighbouring pixels
756//*
757//* @returns
758//*
759//* @remarks
760//*  None
761//*
762//*******************************************************************************/
763//void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src,
764//                                            UWORD8 *pu1_dst,
765//                                            WORD32 src_strd,
766//                                              WORD32 dst_strd,
767//                                              WORD32 ui_neighboravailability)
768
769//**************Variables Vs Registers*****************************************
770//    x0 => *pu1_src
771//    x1 => *pu1_dst
772//    w2 =>  src_strd
773//    w3 =>  dst_strd
774//    w4 =>  ui_neighboravailability
775
776    .global ih264_intra_pred_luma_8x8_mode_horz_d_av8
777
778ih264_intra_pred_luma_8x8_mode_horz_d_av8:
779
780    // STMFD sp!, {x4-x12, x14}          //store register values to stack
781    push_v_regs
782    stp       x19, x20, [sp, #-16]!
783    sxtw      x3, w3
784
785    ld1       { v0.16b}, [x0]
786    mov       v1.d[0], v0.d[1]
787    add       x0, x0, #1
788    ld1       { v2.16b}, [x0]
789    mov       v3.d[0], v2.d[1]
790    ext       v4.16b, v2.16b , v2.16b , #1
791    mov       v5.d[0], v4.d[1]
792    // q1 = q0 shifted to left once
793    // q2 = q1 shifted to left once
794    uaddl     v20.8h, v0.8b, v2.8b
795    uaddl     v22.8h, v1.8b, v3.8b
796    uaddl     v24.8h, v2.8b, v4.8b
797    uaddl     v26.8h, v3.8b, v5.8b
798    add       v24.8h, v20.8h , v24.8h
799    add       v26.8h, v22.8h , v26.8h
800
801    sqrshrun  v4.8b, v20.8h, #1
802    sqrshrun  v5.8b, v22.8h, #1
803    mov       v4.d[1], v5.d[0]
804    sqrshrun  v6.8b, v24.8h, #2
805    sqrshrun  v7.8b, v26.8h, #2
806    mov       v6.d[1], v7.d[0]
807    //Q2 has all FILT11 values
808    //Q3 has all FILT121 values
809    mov       v8.16b, v4.16b
810    mov       v10.16b, v6.16b
811    sub       x6, x3, #6
812    trn1      v9.16b, v8.16b, v10.16b
813    trn2      v10.16b, v8.16b, v10.16b  //
814    mov       v8.16b, v9.16b
815    mov       v12.16b, v8.16b
816    mov       v14.16b, v10.16b
817    sub       x5, x3, #4
818    trn1      v13.8h, v12.8h, v14.8h
819    trn2      v14.8h, v12.8h, v14.8h
820    mov       v12.16b, v13.16b
821    ext       v16.16b, v6.16b , v6.16b , #14
822    //ROW 0
823    st1       {v16.d}[1], [x1]
824    st1       {v10.h}[3], [x1], x3
825
826    //ROW 1
827    st1       {v14.s}[1], [x1], #4
828    st1       {v6.s}[2], [x1], x5
829    //ROW 2
830    st1       {v10.h}[2], [x1], #2
831    st1       {v14.s}[1], [x1], #4
832    st1       {v7.h}[0], [x1], x6
833    //ROW 3
834    st1       {v12.s}[1], [x1], #4
835    st1       {v14.s}[1], [x1], x5
836    //ROW 4
837    st1       {v14.h}[1], [x1], #2
838    st1       {v12.s}[1], [x1], #4
839    st1       {v14.h}[2], [x1], x6
840    //ROW 5
841    st1       {v14.s}[0], [x1], #4
842    st1       {v12.s}[1], [x1], x5
843    //ROW 6
844    st1       {v10.h}[0], [x1], #2
845    st1       {v8.h}[1], [x1], #2
846    st1       {v14.h}[1], [x1], #2
847    st1       {v12.h}[2], [x1], x6
848    //ROW 7
849    st1       {v12.s}[0], [x1], #4
850    st1       {v14.s}[0], [x1], x5
851
852end_func_horz_d:
853    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
854    ldp       x19, x20, [sp], #16
855    pop_v_regs
856    ret
857
858
859
860
861
862///**
863//*******************************************************************************
864//*
865//*ih264_intra_pred_luma_8x8_mode_vert_l
866//*
867//* @brief
868//*  Perform Intra prediction for  luma_8x8 mode:Vertical_Left
869//*
870//* @par Description:
871//*   Perform Intra prediction for  luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8
872//*
873//* @param[in] pu1_src
874//*  UWORD8 pointer to the source
875//*
876//* @param[out] pu1_dst
877//*  UWORD8 pointer to the destination
878//*
879//* @param[in] src_strd
880//*  integer source stride
881//*
882//* @param[in] dst_strd
883//*  integer destination stride
884//*
885//* @param[in] ui_neighboravailability
886//*  availability of neighbouring pixels
887//*
888//* @returns
889//*
890//* @remarks
891//*  None
892//*
893//*******************************************************************************/
894//void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src,
895//                                            UWORD8 *pu1_dst,
896//                                            WORD32 src_strd,
897//                                              WORD32 dst_strd,
898//                                              WORD32 ui_neighboravailability)
899
900//**************Variables Vs Registers*****************************************
901//    x0 => *pu1_src
902//    x1 => *pu1_dst
903//    w2 =>  src_strd
904//    w3 =>  dst_strd
905//    w4 =>  ui_neighboravailability
906
907
908    .global ih264_intra_pred_luma_8x8_mode_vert_l_av8
909
910ih264_intra_pred_luma_8x8_mode_vert_l_av8:
911
912    // STMFD sp!, {x4-x12, x14}         //Restoring registers from stack
913    push_v_regs
914    stp       x19, x20, [sp, #-16]!
915    sxtw      x3, w3
916    add       x0, x0, #9
917    ld1       { v0.16b}, [x0]
918    mov       v1.d[0], v0.d[1]
919    add       x0, x0, #1
920    ld1       { v2.16b}, [x0]
921    mov       v3.d[0], v2.d[1]
922    ext       v4.16b, v2.16b , v2.16b , #1
923    mov       v5.d[0], v4.d[1]
924    uaddl     v20.8h, v0.8b, v2.8b
925    uaddl     v22.8h, v1.8b, v3.8b
926    uaddl     v24.8h, v2.8b, v4.8b
927    uaddl     v26.8h, v3.8b, v5.8b
928    add       v24.8h, v20.8h , v24.8h
929    add       v26.8h, v22.8h , v26.8h
930
931    sqrshrun  v4.8b, v20.8h, #1
932    sqrshrun  v5.8b, v22.8h, #1
933    mov       v4.d[1], v5.d[0]
934    sqrshrun  v6.8b, v24.8h, #2
935    ext       v8.16b, v4.16b , v4.16b , #1
936    sqrshrun  v7.8b, v26.8h, #2
937    mov       v6.d[1], v7.d[0]
938    //Q2 has all FILT11 values
939    //Q3 has all FILT121 values
940
941    ext       v10.16b, v6.16b , v6.16b , #1
942    //ROW 0,1
943    st1       {v4.8b}, [x1], x3
944    st1       {v6.8b}, [x1], x3
945
946    ext       v12.16b, v8.16b , v8.16b , #1
947    ext       v14.16b, v10.16b , v10.16b , #1
948    //ROW 2,3
949    st1       {v8.8b}, [x1], x3
950    st1       {v10.8b}, [x1], x3
951
952    ext       v16.16b, v12.16b , v12.16b , #1
953    ext       v18.16b, v14.16b , v14.16b , #1
954    //ROW 4,5
955    st1       {v12.8b}, [x1], x3
956    st1       {v14.8b}, [x1], x3
957    //ROW 6,7
958    st1       {v16.8b}, [x1], x3
959    st1       {v18.8b}, [x1], x3
960
961end_func_vert_l:
962    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
963    ldp       x19, x20, [sp], #16
964    pop_v_regs
965    ret
966
967
968
969
970
971///**
972//*******************************************************************************
973//*
974//*ih264_intra_pred_luma_8x8_mode_horz_u
975//*
976//* @brief
977//*     Perform Intra prediction for  luma_8x8 mode:Horizontal_Up
978//*
979//* @par Description:
980//*      Perform Intra prediction for  luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9
981//*
982//* @param[in] pu1_src
983//*  UWORD8 pointer to the source
984//*
985//* @param[out] pu1_dst
986//*  UWORD8 pointer to the destination
987//*
988//* @param[in] src_strd
989//*  integer source stride
990//*
991//* @param[in] dst_strd
992//*  integer destination stride
993//*
994//* @param[in] ui_neighboravailability
995//*  availability of neighbouring pixels
996//*
997//* @returns
998//*
999//* @remarks
1000//*  None
1001//*
1002//*******************************************************************************/
1003//void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src,
1004//                                           UWORD8 *pu1_dst,
1005//                                           WORD32 src_strd,
1006//                                             WORD32 dst_strd,
1007//                                             WORD32 ui_neighboravailability)
1008
1009//**************Variables Vs Registers*****************************************
1010//    x0 => *pu1_src
1011//    x1 => *pu1_dst
1012//    w2 =>  src_strd
1013//    w3 =>  dst_strd
1014//    w4 =>  ui_neighboravailability
1015
1016    .global ih264_intra_pred_luma_8x8_mode_horz_u_av8
1017
1018ih264_intra_pred_luma_8x8_mode_horz_u_av8:
1019
1020    // STMFD sp!, {x4-x12, x14}          //store register values to stack
1021    push_v_regs
1022    stp       x19, x20, [sp, #-16]!
1023    sxtw      x3, w3
1024
1025    ld1       {v0.8b}, [x0]
1026    ld1       {v1.b}[7], [x0]
1027    mov       v0.d[1], v1.d[0]
1028    ext       v2.16b, v0.16b , v0.16b , #1
1029    mov       v3.d[0], v2.d[1]
1030    ext       v4.16b, v2.16b , v2.16b , #1
1031    mov       v5.d[0], v4.d[1]
1032
1033    adrp      x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u
1034    ldr       x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u]
1035    uaddl     v20.8h, v0.8b, v2.8b
1036    uaddl     v22.8h, v1.8b, v3.8b
1037    uaddl     v24.8h, v2.8b, v4.8b
1038    uaddl     v26.8h, v3.8b, v5.8b
1039    add       v24.8h, v20.8h , v24.8h
1040    add       v26.8h, v22.8h , v26.8h
1041    ld1       { v10.16b}, [x12]
1042    mov       v11.d[0], v10.d[1]
1043    sqrshrun  v4.8b, v20.8h, #1
1044    sqrshrun  v5.8b, v22.8h, #1
1045    mov       v4.d[1], v5.d[0]
1046    sqrshrun  v6.8b, v24.8h, #2
1047    sqrshrun  v7.8b, v26.8h, #2
1048    mov       v6.d[1], v7.d[0]
1049    //Q2 has all FILT11 values
1050    //Q3 has all FILT121 values
1051    mov       v30.16b, v4.16b
1052    mov       v31.16b, v6.16b
1053    tbl       v12.8b, {v30.16b, v31.16b}, v10.8b
1054    dup       v14.16b, v5.b[7]          //
1055    tbl       v13.8b, {v30.16b, v31.16b}, v11.8b
1056    mov       v12.d[1], v13.d[0]
1057    ext       v16.16b, v12.16b , v14.16b , #2
1058    ext       v18.16b, v16.16b , v14.16b , #2
1059    st1       {v12.8b}, [x1], x3        //0
1060    ext       v20.16b, v18.16b , v14.16b , #2
1061    st1       {v16.8b}, [x1], x3        //1
1062    st1       {v18.8b}, [x1], x3        //2
1063    st1       {v20.8b}, [x1], x3        //3
1064    st1       {v13.8b}, [x1], x3        //4
1065    st1       {v16.d}[1], [x1], x3      //5
1066    st1       {v18.d}[1], [x1], x3      //6
1067    st1       {v20.d}[1], [x1], x3      //7
1068
1069
1070end_func_horz_u:
1071    // LDMFD sp!,{x4-x12,PC}         //Restoring registers from stack
1072    ldp       x19, x20, [sp], #16
1073    pop_v_regs
1074    ret
1075
1076
1077