1@/*****************************************************************************
2@*
3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4@*
5@* Licensed under the Apache License, Version 2.0 (the "License");
6@* you may not use this file except in compliance with the License.
7@* You may obtain a copy of the License at:
8@*
9@* http://www.apache.org/licenses/LICENSE-2.0
10@*
11@* Unless required by applicable law or agreed to in writing, software
12@* distributed under the License is distributed on an "AS IS" BASIS,
13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@* See the License for the specific language governing permissions and
15@* limitations under the License.
16@*
17@*****************************************************************************/
18@/**
19@/*******************************************************************************
20@* @file
21@*  ihevcd_itrans_recon_dc_chroma.s
22@*
23@* @brief
24@*  contains function definitions itrans and recon for dc only case
25@*
26@* @author
27@*  ittiam
28@*
29@* @par list of functions:
30@*
31@*
32@* @remarks
33@*  none
34@*
35@*******************************************************************************/
36
37.text
38
39
40.globl ihevcd_itrans_recon_dc_chroma_a9q
41
42.type ihevcd_itrans_recon_dc_chroma_a9q, %function
43
44ihevcd_itrans_recon_dc_chroma_a9q:
45
46@void ihevcd_itrans_recon_dc_chroma(uword8 *pu1_pred,
47@                            uword8 *pu1_dst,
48@                            word32 pred_strd,
49@                            word32 dst_strd,
50@                            word32 log2_trans_size,
51@                            word16 i2_coeff_value)
52
53@r0:pu1_pred
54@r1:pu1_dest
55@r2:pred_strd
56@r3:dst_strd
57
58
59
60    push        {r0-r11,lr}
61    vpush       {d8-d15}
62    ldr         r4,[sp,#0x74]               @loads log2_trans_size
63    ldr         r5,[sp,#0x78]               @ loads i2_coeff_value
64    mov         r10,#1
65    lsl         r4,r10,r4                   @    trans_size = (1 << log2_trans_size)@
66    mov         r6,#64 @ 1 << (shift1 - 1)@
67    mov         r7,#2048                    @ 1<<(shift2-1)
68
69    add         r8,r6,r5,lsl #6
70    ssat        r8,#16,r8,asr #7
71    add         r5,r7,r8,lsl #6
72    ssat        r6,#16,r5,asr #12
73    mov         r9,r4
74    mov         r8,r4
75
76    @ r6 has the dc_value
77    @ r4 has the trans_size value
78    @ r8 has the row value
79    @ r9 has the col value
80    vdup.s16    q0,r6
81    cmp         r4,#4
82    beq         row_loop_4chroma
83
84
85row_loop_chroma:
86    mov         r9,r4
87
88
89col_loop_chroma:
90
91    mov         r7,r0
92    vld2.8      {d2,d3},[r7],r2
93    vld2.8      {d4,d5},[r7],r2
94    vld2.8      {d6,d7},[r7],r2
95    vld2.8      {d8,d9},[r7],r2
96
97    vld2.8      {d10,d11},[r7],r2
98    vld2.8      {d12,d13},[r7],r2
99    vld2.8      {d14,d15},[r7],r2
100    vld2.8      {d16,d17},[r7]
101
102    add         r0,r0,#16
103
104
105    vaddw.u8    q15,q0,d2
106    vaddw.u8    q14,q0,d4
107    vaddw.u8    q13,q0,d6
108    vaddw.u8    q12,q0,d8
109    vaddw.u8    q11,q0,d10
110    vaddw.u8    q10,q0,d12
111    vaddw.u8    q9,q0,d14
112
113
114    mov         r11,r1
115    vqmovun.s16 d2,q15
116    vqmovun.s16 d4,q14
117    vqmovun.s16 d6,q13
118    vqmovun.s16 d8,q12
119
120    vaddw.u8    q15,q0,d16
121
122    vqmovun.s16 d10,q11
123    vqmovun.s16 d12,q10
124    vqmovun.s16 d14,q9
125    vqmovun.s16 d16,q15
126
127    vst2.8      {d2,d3},[r11],r3
128    vst2.8      {d4,d5},[r11],r3
129    vst2.8      {d6,d7},[r11],r3
130    vst2.8      {d8,d9},[r11],r3
131
132    vst2.8      {d10,d11},[r11],r3
133    vst2.8      {d12,d13},[r11],r3
134    vst2.8      {d14,d15},[r11],r3
135    vst2.8      {d16,d17},[r11]
136
137    add         r1,r1,#16
138
139    subs        r9,r9,#8
140    bgt         col_loop_chroma
141
142    subs        r8,r8,#8
143
144    add         r0,r0,r2,lsl #3
145    add         r1,r1,r3,lsl #3
146    sub         r0,r0,r4,lsl #1
147    sub         r1,r1,r4,lsl #1
148    bgt         row_loop_chroma
149    b           end_loops_chroma
150
151
152row_loop_4chroma:
153    mov         r9,r10
154
155
156col_loop_4chroma:
157
158
159    vld2.8      {d2,d3},[r0],r2
160    vld2.8      {d4,d5},[r0],r2
161    vld2.8      {d6,d7},[r0],r2
162    vld2.8      {d8,d9},[r0]
163
164
165
166
167    vaddw.u8    q15,q0,d2
168    vaddw.u8    q14,q0,d4
169    vaddw.u8    q13,q0,d6
170    vaddw.u8    q12,q0,d8
171
172
173
174    vqmovun.s16 d2,q15
175    vqmovun.s16 d4,q14
176    vqmovun.s16 d6,q13
177    vqmovun.s16 d8,q12
178
179
180    vzip.8      d2,d3
181    vzip.8      d4,d5
182    vzip.8      d6,d7
183    vzip.8      d8,d9
184
185    vst1.u32    {d2},[r1],r3
186    vst1.u32    {d4},[r1],r3
187    vst1.u32    {d6},[r1],r3
188    vst1.u32    {d8},[r1]
189
190end_loops_chroma:
191    vpop        {d8-d15}
192    pop         {r0-r11,pc}
193
194
195