1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@/*******************************************************************************
20@* @file
21@*  ihevcd_itrans_recon_dc_luma.s
22@*
23@* @brief
24@*  contains function definitions itrans and recon for dc only case
25@*
26@* @author
27@*  ittiam
28@*
29@* @par list of functions:
30@*
31@*
32@* @remarks
33@*  none
34@*
35@*******************************************************************************/
36
37.text
38
39
40
41.globl ihevcd_itrans_recon_dc_luma_a9q
42
43.type ihevcd_itrans_recon_dc_luma_a9q, %function
44
45ihevcd_itrans_recon_dc_luma_a9q:
46
47@void ihevcd_itrans_recon_dc_luma(uword8 *pu1_pred,
48@                            uword8 *pu1_dst,
49@                            word32 pred_strd,
50@                            word32 dst_strd,
51@                            word32 log2_trans_size,
52@                            word16 i2_coeff_value)
53
54@r0:pu1_pred
55@r1:pu1_dest
56@r2:pred_strd
57@r3:dst_strd
58
59
60
61    push        {r0-r11,lr}
62    ldr         r4,[sp,#0x34]               @loads log2_trans_size
63    ldr         r5,[sp,#0x38]               @ loads i2_coeff_value
64
65    mov         r10,#1
66    lsl         r4,r10,r4                   @    trans_size = (1 << log2_trans_size)@
67    mov         r6,#64 @ 1 << (shift1 - 1)@
68    mov         r7,#2048                    @ 1<<(shift2-1)
69
70    add         r8,r6,r5,lsl #6
71    ssat        r8,#16,r8,asr #7
72    add         r5,r7,r8,lsl #6
73    ssat        r6,#16,r5,asr #12
74    mov         r9,r4
75    mov         r8,r4
76
77    @ r6 has the dc_value
78    @ r4 has the trans_size value
79    @ r8 has the row value
80    @ r9 has the col value
81    vdup.s16    q0,r6
82    cmp         r4,#4
83    beq         row_loop_4
84
85
86row_loop:
87    mov         r9,r4
88
89
90col_loop:
91
92    mov         r7,r0
93    vld1.8      d2,[r7],r2
94    vld1.8      d3,[r7],r2
95    vld1.8      d4,[r7],r2
96    vld1.8      d5,[r7],r2
97
98    vld1.8      d6,[r7],r2
99    vld1.8      d7,[r7],r2
100    vld1.8      d8,[r7],r2
101    vld1.8      d9,[r7]
102
103    add         r0,r0,#8
104
105
106    vaddw.u8    q15,q0,d2
107    vaddw.u8    q14,q0,d3
108    vaddw.u8    q13,q0,d4
109    vaddw.u8    q12,q0,d5
110    vaddw.u8    q11,q0,d6
111    vaddw.u8    q10,q0,d7
112    vaddw.u8    q9,q0,d8
113    vaddw.u8    q8,q0,d9
114
115    mov         r11,r1
116    vqmovun.s16 d2,q15
117    vqmovun.s16 d3,q14
118    vqmovun.s16 d4,q13
119    vqmovun.s16 d5,q12
120    vqmovun.s16 d6,q11
121    vqmovun.s16 d7,q10
122    vqmovun.s16 d8,q9
123    vqmovun.s16 d9,q8
124
125
126    vst1.u32    {d2},[r11],r3
127    vst1.u32    {d3},[r11],r3
128    vst1.u32    {d4},[r11],r3
129    vst1.u32    {d5},[r11],r3
130    vst1.u32    {d6},[r11],r3
131    vst1.u32    {d7},[r11],r3
132    vst1.u32    {d8},[r11],r3
133    vst1.u32    {d9},[r11]
134
135    add         r1,r1,#8
136
137    subs        r9,r9,#8
138    bgt         col_loop
139
140    subs        r8,r8,#8
141
142    add         r0,r0,r2,lsl #3
143    add         r1,r1,r3,lsl #3
144    sub         r0,r0,r4
145    sub         r1,r1,r4
146    bgt         row_loop
147    b           end_loops
148
149
150row_loop_4:
151    mov         r9,r10
152
153
154col_loop_4:
155
156
157    vld1.8      d2,[r0],r2
158    vld1.8      d3,[r0],r2
159    vld1.8      d4,[r0],r2
160    vld1.8      d5,[r0]
161
162
163
164
165    vaddw.u8    q15,q0,d2
166    vaddw.u8    q14,q0,d3
167    vaddw.u8    q13,q0,d4
168    vaddw.u8    q12,q0,d5
169
170
171
172    vqmovun.s16 d2,q15
173    vqmovun.s16 d3,q14
174    vqmovun.s16 d4,q13
175    vqmovun.s16 d5,q12
176
177
178
179    vst1.u32    {d2[0]},[r1],r3
180    vst1.u32    {d3[0]},[r1],r3
181    vst1.u32    {d4[0]},[r1],r3
182    vst1.u32    {d5[0]},[r1]
183
184end_loops:
185    pop         {r0-r11,pc}
186
187
188
189
190
191
192
193
194