1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@*******************************************************************************
20@* @file
21@*  ihevc_intra_pred_luma_mode_18_34_neon.s
22@*
23@* @brief
24@*  contains function definitions for intra prediction dc filtering.
25@* functions are coded using neon  intrinsics and can be compiled using
26
27@* rvct
28@*
29@* @author
30@*  yogeswaran rs
31@*
32@* @par list of functions:
33@*
34@*
35@* @remarks
36@*  none
37@*
38@*******************************************************************************
39@*/
40@/**
41@*******************************************************************************
42@*
43@* @brief
44@*    luma intraprediction filter for dc input
45@*
46@* @par description:
47@*
48@* @param[in] pu1_ref
49@*  uword8 pointer to the source
50@*
51@* @param[out] pu1_dst
52@*  uword8 pointer to the destination
53@*
54@* @param[in] src_strd
55@*  integer source stride
56@*
57@* @param[in] dst_strd
58@*  integer destination stride
59@*
60@* @param[in] pi1_coeff
61@*  word8 pointer to the planar coefficients
62@*
63@* @param[in] nt
64@*  size of tranform block
65@*
66@* @param[in] mode
67@*  type of filtering
68@*
69@* @returns
70@*
71@* @remarks
72@*  none
73@*
74@*******************************************************************************
75@*/
76
77@void ihevc_intra_pred_chroma_mode_18_34(uword8 *pu1_ref,
78@                                      word32 src_strd,
79@                                      uword8 *pu1_dst,
80@                                      word32 dst_strd,
81@                                      word32 nt,
82@                                      word32 mode)
83@
84@**************variables vs registers*****************************************
85@r0 => *pu1_ref
86@r1 => src_strd
87@r2 => *pu1_dst
88@r3 => dst_strd
89
90@stack contents from #40
91@   nt
92@   mode
93@   pi1_coeff
94
95.text
96.align 4
97
98
99
100
101.globl ihevc_intra_pred_chroma_mode_18_34_a9q
102
103.type ihevc_intra_pred_chroma_mode_18_34_a9q, %function
104
105ihevc_intra_pred_chroma_mode_18_34_a9q:
106
107    stmfd       sp!, {r4-r12, r14}          @stack stores the values of the arguments
108
109
110    ldr         r4,[sp,#40]
111    ldr         r5,[sp,#44]
112
113    cmp         r4,#4
114    beq         mode2_4
115
116    mov         r12,r4
117    mov         r11,r4
118    add         r0,r0,r4,lsl #2
119
120    cmp         r5,#0x22
121    mov         r10,r2
122
123    add         r0,r0,#4
124
125    subne       r0,r0,#4
126    moveq       r6,#2
127    movne       r6,#-2
128    mov         r8,r0
129
130
131kernel:
132
133
134    vld1.8      {d0,d1},[r8],r6
135    vst1.8      {d0,d1},[r10],r3
136    vld1.8      {d2,d3},[r8],r6
137    vst1.8      {d2,d3},[r10],r3
138    vld1.8      {d4,d5},[r8],r6
139    vst1.8      {d4,d5},[r10],r3
140    vld1.8      {d6,d7},[r8],r6
141    vst1.8      {d6,d7},[r10],r3
142    vld1.8      {d8,d9},[r8],r6
143    vst1.8      {d8,d9},[r10],r3
144    vld1.8      {d10,d11},[r8],r6
145    vst1.8      {d10,d11},[r10],r3
146    vld1.8      {d12,d13},[r8],r6
147    vst1.8      {d12,d13},[r10],r3
148    vld1.8      {d14,d15},[r8],r6
149    vst1.8      {d14,d15},[r10],r3
150
151    subs        r12,r12,#8
152    bne         kernel
153
154    cmp         r11,#16
155    add         r8,r0,#16
156    add         r10,r2,#16
157    sub         r11,#16
158    mov         r12,#16
159    beq         kernel
160    b           end_func
161
162mode2_4:
163
164    add         r0,r0,#20
165    cmp         r5,#0x22
166    subne       r0,r0,#4
167
168    moveq       r8,#2
169    movne       r8,#-2
170
171    vld1.8      {d0},[r0],r8
172    vst1.32     {d0},[r2],r3
173
174    vld1.8      {d0},[r0],r8
175    vst1.32     {d0},[r2],r3
176
177    vld1.8      {d0},[r0],r8
178    vst1.32     {d0},[r2],r3
179
180    vld1.8      {d0},[r0],r8
181    vst1.32     {d0},[r2],r3
182
183end_func:
184    ldmfd       sp!,{r4-r12,r15}            @reload the registers from sp
185
186
187
188
189
190
191