1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vp8_rtcd.h"
12 
13 /****************************************************************************
14  * Notes:
15  *
16  * This implementation makes use of 16 bit fixed point verio of two multiply
17  * constants:
18  *         1.   sqrt(2) * cos (pi/8)
19  *         2.   sqrt(2) * sin (pi/8)
20  * Becuase the first constant is bigger than 1, to maintain the same 16 bit
21  * fixed point precision as the second one, we use a trick of
22  *         x * a = x + x*(a-1)
23  * so
24  *         x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
25  **************************************************************************/
26 static const int cospi8sqrt2minus1 = 20091;
27 static const int sinpi8sqrt2 = 35468;
28 
vp8_short_idct4x4llm_c(short * input,unsigned char * pred_ptr,int pred_stride,unsigned char * dst_ptr,int dst_stride)29 void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr,
30                             int pred_stride, unsigned char *dst_ptr,
31                             int dst_stride) {
32   int i;
33   int r, c;
34   int a1, b1, c1, d1;
35   short output[16];
36   short *ip = input;
37   short *op = output;
38   int temp1, temp2;
39   int shortpitch = 4;
40 
41   for (i = 0; i < 4; ++i) {
42     a1 = ip[0] + ip[8];
43     b1 = ip[0] - ip[8];
44 
45     temp1 = (ip[4] * sinpi8sqrt2) >> 16;
46     temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
47     c1 = temp1 - temp2;
48 
49     temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
50     temp2 = (ip[12] * sinpi8sqrt2) >> 16;
51     d1 = temp1 + temp2;
52 
53     op[shortpitch * 0] = a1 + d1;
54     op[shortpitch * 3] = a1 - d1;
55 
56     op[shortpitch * 1] = b1 + c1;
57     op[shortpitch * 2] = b1 - c1;
58 
59     ip++;
60     op++;
61   }
62 
63   ip = output;
64   op = output;
65 
66   for (i = 0; i < 4; ++i) {
67     a1 = ip[0] + ip[2];
68     b1 = ip[0] - ip[2];
69 
70     temp1 = (ip[1] * sinpi8sqrt2) >> 16;
71     temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
72     c1 = temp1 - temp2;
73 
74     temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
75     temp2 = (ip[3] * sinpi8sqrt2) >> 16;
76     d1 = temp1 + temp2;
77 
78     op[0] = (a1 + d1 + 4) >> 3;
79     op[3] = (a1 - d1 + 4) >> 3;
80 
81     op[1] = (b1 + c1 + 4) >> 3;
82     op[2] = (b1 - c1 + 4) >> 3;
83 
84     ip += shortpitch;
85     op += shortpitch;
86   }
87 
88   ip = output;
89   for (r = 0; r < 4; ++r) {
90     for (c = 0; c < 4; ++c) {
91       int a = ip[c] + pred_ptr[c];
92 
93       if (a < 0) a = 0;
94 
95       if (a > 255) a = 255;
96 
97       dst_ptr[c] = (unsigned char)a;
98     }
99     ip += 4;
100     dst_ptr += dst_stride;
101     pred_ptr += pred_stride;
102   }
103 }
104 
vp8_dc_only_idct_add_c(short input_dc,unsigned char * pred_ptr,int pred_stride,unsigned char * dst_ptr,int dst_stride)105 void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
106                             int pred_stride, unsigned char *dst_ptr,
107                             int dst_stride) {
108   int a1 = ((input_dc + 4) >> 3);
109   int r, c;
110 
111   for (r = 0; r < 4; ++r) {
112     for (c = 0; c < 4; ++c) {
113       int a = a1 + pred_ptr[c];
114 
115       if (a < 0) a = 0;
116 
117       if (a > 255) a = 255;
118 
119       dst_ptr[c] = (unsigned char)a;
120     }
121 
122     dst_ptr += dst_stride;
123     pred_ptr += pred_stride;
124   }
125 }
126 
vp8_short_inv_walsh4x4_c(short * input,short * mb_dqcoeff)127 void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) {
128   short output[16];
129   int i;
130   int a1, b1, c1, d1;
131   int a2, b2, c2, d2;
132   short *ip = input;
133   short *op = output;
134 
135   for (i = 0; i < 4; ++i) {
136     a1 = ip[0] + ip[12];
137     b1 = ip[4] + ip[8];
138     c1 = ip[4] - ip[8];
139     d1 = ip[0] - ip[12];
140 
141     op[0] = a1 + b1;
142     op[4] = c1 + d1;
143     op[8] = a1 - b1;
144     op[12] = d1 - c1;
145     ip++;
146     op++;
147   }
148 
149   ip = output;
150   op = output;
151 
152   for (i = 0; i < 4; ++i) {
153     a1 = ip[0] + ip[3];
154     b1 = ip[1] + ip[2];
155     c1 = ip[1] - ip[2];
156     d1 = ip[0] - ip[3];
157 
158     a2 = a1 + b1;
159     b2 = c1 + d1;
160     c2 = a1 - b1;
161     d2 = d1 - c1;
162 
163     op[0] = (a2 + 3) >> 3;
164     op[1] = (b2 + 3) >> 3;
165     op[2] = (c2 + 3) >> 3;
166     op[3] = (d2 + 3) >> 3;
167 
168     ip += 4;
169     op += 4;
170   }
171 
172   for (i = 0; i < 16; ++i) {
173     mb_dqcoeff[i * 16] = output[i];
174   }
175 }
176 
vp8_short_inv_walsh4x4_1_c(short * input,short * mb_dqcoeff)177 void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) {
178   int i;
179   int a1;
180 
181   a1 = ((input[0] + 3) >> 3);
182   for (i = 0; i < 16; ++i) {
183     mb_dqcoeff[i * 16] = a1;
184   }
185 }
186