1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19 *******************************************************************************
20 * @file
21 *  ihevc_trans_macros.h
22 *
23 * @brief
24 *  Macros used in the forward transform and inverse transform functions
25 *
26 * @author
27 *  Ittiam
28 *
29 * @remarks
30 *  None
31 *
32 *******************************************************************************
33 */
34 #ifndef IHEVC_TRANS_MACROS_H_
35 #define IHEVC_TRANS_MACROS_H_
36 
37 #define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
38 {                                                                                                                                                                \
39     LWORD64 tmp;                                                                                                                                                  \
40     WORD32 sign;                                                                                                                                                 \
41     WORD32 bit_depth,transform_shift;                                                                                                                            \
42     WORD32  q_bits, quant_multiplier;                                                                                                                            \
43                                                                                                                                                                  \
44     /* q_bits and q_add calculation*/                                                                                                                            \
45     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
46     bit_depth = 8;                                                                                                                                               \
47     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
48     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
49     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
50                                                                                                                                                                  \
51     sign = (inp)<0 ? -1:1;                                                                                                                                       \
52                                                                                                                                                                  \
53     tmp = (LWORD64)(abs(inp));                                                                                                                                    \
54     tmp = tmp * (quant_coeff);                                                                                                                                   \
55     tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
56     tmp = tmp >> q_bits;                                                                                                                                         \
57                                                                                                                                                                  \
58     tmp = tmp * sign;                                                                                                                                            \
59     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
60 }                                                                                                                                                                \
61 
62 #define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
63 {                                                                                                                                                                \
64     LWORD64 tmp;                                                                                                                                                  \
65     WORD32 sign;                                                                                                                                                 \
66     WORD32 transform_shift;                                                                                                                                      \
67     WORD32  q_bits, quant_multiplier;                                                                                                                            \
68                                                                                                                                                                  \
69     /* q_bits and q_add calculation*/                                                                                                                            \
70     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
71                                                                                                                                                                  \
72     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
73     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
74     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
75                                                                                                                                                                  \
76     sign = (inp)<0 ? -1:1;                                                                                                                                       \
77                                                                                                                                                                  \
78     tmp = (LWORD64)(abs(inp));                                                                                                                                    \
79     tmp = tmp * (quant_coeff);                                                                                                                                   \
80     tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
81     tmp = tmp >> q_bits;                                                                                                                                         \
82                                                                                                                                                                  \
83     tmp = tmp * sign;                                                                                                                                            \
84     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
85 }
86 /* added by 100028 */
87 #define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
88 {                                                                                                                                                                \
89     WORD32 tmp;                                                                                                                                                  \
90     WORD32 sign;                                                                                                                                                 \
91     WORD32 bit_depth,transform_shift;                                                                                                                            \
92     WORD32  q_bits, quant_multiplier;                                                                                                                            \
93                                                                                                                                                                  \
94     /* q_bits and q_add calculation*/                                                                                                                            \
95     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
96     bit_depth = 8;                                                                                                                                               \
97     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
98     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
99     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
100                                                                                                                                                                  \
101     sign = (inp)<0 ? -1:1;                                                                                                                                       \
102                                                                                                                                                                  \
103     tmp = (WORD32)(abs(inp));                                                                                                                                    \
104     tmp = tmp * (quant_coeff);                                                                                                                                   \
105     tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
106     tmp = tmp >> q_bits;                                                                                                                                         \
107                                                                                                                                                                  \
108     tmp = tmp * sign;                                                                                                                                            \
109     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
110 }
111 
112 #define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
113 {                                                                                                                                                                \
114     WORD32 tmp;                                                                                                                                                  \
115     WORD32 sign;                                                                                                                                                 \
116     WORD32 transform_shift;                                                                                                                                      \
117     WORD32  q_bits, quant_multiplier;                                                                                                                            \
118                                                                                                                                                                  \
119     /* q_bits and q_add calculation*/                                                                                                                            \
120     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
121                                                                                                                                                                  \
122     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
123     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
124     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
125                                                                                                                                                                  \
126     sign = (inp)<0 ? -1:1;                                                                                                                                       \
127                                                                                                                                                                  \
128     tmp = (WORD32)(abs(inp));                                                                                                                                    \
129     tmp = tmp * (quant_coeff);                                                                                                                                   \
130     tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
131     tmp = tmp >> q_bits;                                                                                                                                         \
132                                                                                                                                                                  \
133     tmp = tmp * sign;                                                                                                                                            \
134     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
135 }
136 /* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned  */
137 
138 /* Inverse quantization other than 4x4 */
139 /* No clipping is needed for "pi2_src"(coefficients) */
140 #define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)       \
141 {                                                                                                                                              \
142     WORD32 tmp, add_iq;                                                                                                                        \
143                                                                                                                                                \
144     add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
145                                                                                                                                                \
146     tmp = coeff * dequant_coeff ;                                                                                                              \
147     tmp = tmp + add_iq;                                                                                                                        \
148     tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
149                                                                                                                                                \
150     res = CLIP_S16(tmp);                                                                                                                       \
151 }
152 
153 /* 4x4 inverse quantization */
154 /* Options : */
155 /* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/
156 /* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */
157 
158 #define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)   \
159 {                                                                                                                                              \
160     WORD32 clip_coeff, tmp;                                                                                                                    \
161     WORD32 coeff_min,coeff_max;                                                                                                                \
162     WORD32 coeff_bit_range;                                                                                                                    \
163     WORD32 add_iq;                                                                                                                             \
164     add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
165                                                                                                                                                \
166     coeff_bit_range = 16;                                                                                                                      \
167     if(qp_div > shift_iq)                                                                                                                      \
168         coeff_bit_range = 10;                                                                                                                  \
169                                                                                                                                                \
170     coeff_min = -(1<<(coeff_bit_range-1));                                                                                                     \
171     coeff_max = (1<<(coeff_bit_range-1)) - 1;                                                                                                  \
172                                                                                                                                                \
173     clip_coeff = CLIP3(coeff,coeff_min,coeff_max);                                                                                             \
174                                                                                                                                                \
175     tmp = clip_coeff * dequant_coeff ;                                                                                                         \
176     tmp = tmp + add_iq;                                                                                                                        \
177     tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
178                                                                                                                                                \
179     res = CLIP_S16(tmp);                                                                                                                       \
180 }
181 
182 #endif /* IHEVC_TRANS_MACROS_H_ */
183