1 /* 2 * Copyright (c) 2015 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #ifndef VPX_DSP_MIPS_FWD_TXFM_MSA_H_ 12 #define VPX_DSP_MIPS_FWD_TXFM_MSA_H_ 13 14 #include "vpx_dsp/mips/txfm_macros_msa.h" 15 #include "vpx_dsp/txfm_common.h" 16 17 #define LD_HADD(psrc, stride) ({ \ 18 v8i16 in0_m, in1_m, in2_m, in3_m, in4_m, in5_m, in6_m, in7_m; \ 19 v4i32 vec_w_m; \ 20 \ 21 LD_SH4((psrc), stride, in0_m, in1_m, in2_m, in3_m); \ 22 ADD2(in0_m, in1_m, in2_m, in3_m, in0_m, in2_m); \ 23 LD_SH4(((psrc) + 4 * stride), stride, in4_m, in5_m, in6_m, in7_m); \ 24 ADD4(in4_m, in5_m, in6_m, in7_m, in0_m, in2_m, in4_m, in6_m, \ 25 in4_m, in6_m, in0_m, in4_m); \ 26 in0_m += in4_m; \ 27 \ 28 vec_w_m = __msa_hadd_s_w(in0_m, in0_m); \ 29 HADD_SW_S32(vec_w_m); \ 30 }) 31 32 #define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) { \ 33 v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \ 34 v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \ 35 v4i32 vec4_m, vec5_m, vec6_m, vec7_m; \ 36 v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, \ 37 cospi_24_64, -cospi_8_64, 0, 0, 0 }; \ 38 \ 39 BUTTERFLY_4(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m); \ 40 ILVR_H2_SH(vec1_m, vec0_m, vec3_m, vec2_m, vec0_m, vec2_m); \ 41 SPLATI_H2_SH(coeff_m, 0, 1, cnst0_m, cnst1_m); \ 42 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 43 vec5_m = __msa_dotp_s_w(vec0_m, cnst1_m); \ 44 \ 45 SPLATI_H2_SH(coeff_m, 4, 3, cnst2_m, cnst3_m); \ 46 cnst2_m = __msa_ilvev_h(cnst3_m, cnst2_m); \ 47 vec7_m = __msa_dotp_s_w(vec2_m, cnst2_m); \ 48 \ 49 vec4_m = __msa_dotp_s_w(vec0_m, cnst0_m); \ 50 cnst2_m = __msa_splati_h(coeff_m, 2); \ 51 cnst2_m = __msa_ilvev_h(cnst2_m, cnst3_m); \ 52 vec6_m = __msa_dotp_s_w(vec2_m, cnst2_m); \ 53 \ 54 SRARI_W4_SW(vec4_m, vec5_m, vec6_m, vec7_m, DCT_CONST_BITS); \ 55 PCKEV_H4_SH(vec4_m, vec4_m, vec5_m, vec5_m, vec6_m, vec6_m, \ 56 vec7_m, vec7_m, out0, out2, out1, out3); \ 57 } 58 59 #define SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7) { \ 60 v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ 61 \ 62 SRLI_H4_SH(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m, 15); \ 63 SRLI_H4_SH(in4, in5, in6, in7, vec4_m, vec5_m, vec6_m, vec7_m, 15); \ 64 AVE_SH4_SH(vec0_m, in0, vec1_m, in1, vec2_m, in2, vec3_m, in3, \ 65 in0, in1, in2, in3); \ 66 AVE_SH4_SH(vec4_m, in4, vec5_m, in5, vec6_m, in6, vec7_m, in7, \ 67 in4, in5, in6, in7); \ 68 } 69 70 #define VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \ 71 out0, out1, out2, out3, out4, out5, out6, out7) { \ 72 v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m; \ 73 v8i16 s7_m, x0_m, x1_m, x2_m, x3_m; \ 74 v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, \ 75 cospi_24_64, cospi_4_64, cospi_28_64, \ 76 cospi_12_64, cospi_20_64 }; \ 77 \ 78 /* FDCT stage1 */ \ 79 BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, \ 80 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m); \ 81 BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \ 82 ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \ 83 ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \ 84 SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \ 85 x1_m = __msa_ilvev_h(x1_m, x0_m); \ 86 out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ 87 \ 88 SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \ 89 x2_m = -x2_m; \ 90 x2_m = __msa_ilvev_h(x3_m, x2_m); \ 91 out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ 92 \ 93 out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ 94 x2_m = __msa_splati_h(coeff_m, 2); \ 95 x2_m = __msa_ilvev_h(x2_m, x3_m); \ 96 out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ 97 \ 98 /* stage2 */ \ 99 ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \ 100 \ 101 s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ 102 s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ 103 \ 104 /* stage3 */ \ 105 BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \ 106 \ 107 /* stage4 */ \ 108 ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \ 109 ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \ 110 \ 111 SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \ 112 x1_m = __msa_ilvev_h(x0_m, x1_m); \ 113 out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \ 114 \ 115 SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \ 116 x2_m = __msa_ilvev_h(x3_m, x2_m); \ 117 out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ 118 \ 119 x1_m = __msa_splati_h(coeff_m, 5); \ 120 x0_m = -x0_m; \ 121 x0_m = __msa_ilvev_h(x1_m, x0_m); \ 122 out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \ 123 \ 124 x2_m = __msa_splati_h(coeff_m, 6); \ 125 x3_m = -x3_m; \ 126 x2_m = __msa_ilvev_h(x2_m, x3_m); \ 127 out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ 128 } 129 130 #define FDCT8x16_EVEN(in0, in1, in2, in3, in4, in5, in6, in7, \ 131 out0, out1, out2, out3, out4, out5, out6, out7) { \ 132 v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \ 133 v8i16 x0_m, x1_m, x2_m, x3_m; \ 134 v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \ 135 cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 }; \ 136 \ 137 /* FDCT stage1 */ \ 138 BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, \ 139 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m); \ 140 BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \ 141 ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \ 142 ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \ 143 SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \ 144 x1_m = __msa_ilvev_h(x1_m, x0_m); \ 145 out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ 146 \ 147 SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \ 148 x2_m = -x2_m; \ 149 x2_m = __msa_ilvev_h(x3_m, x2_m); \ 150 out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ 151 \ 152 out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ 153 x2_m = __msa_splati_h(coeff_m, 2); \ 154 x2_m = __msa_ilvev_h(x2_m, x3_m); \ 155 out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ 156 \ 157 /* stage2 */ \ 158 ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \ 159 \ 160 s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ 161 s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ 162 \ 163 /* stage3 */ \ 164 BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \ 165 \ 166 /* stage4 */ \ 167 ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \ 168 ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \ 169 \ 170 SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \ 171 x1_m = __msa_ilvev_h(x0_m, x1_m); \ 172 out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \ 173 \ 174 SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \ 175 x2_m = __msa_ilvev_h(x3_m, x2_m); \ 176 out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ 177 \ 178 x1_m = __msa_splati_h(coeff_m, 5); \ 179 x0_m = -x0_m; \ 180 x0_m = __msa_ilvev_h(x1_m, x0_m); \ 181 out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \ 182 \ 183 x2_m = __msa_splati_h(coeff_m, 6); \ 184 x3_m = -x3_m; \ 185 x2_m = __msa_ilvev_h(x2_m, x3_m); \ 186 out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ 187 } 188 189 #define FDCT8x16_ODD(input0, input1, input2, input3, \ 190 input4, input5, input6, input7, \ 191 out1, out3, out5, out7, \ 192 out9, out11, out13, out15) { \ 193 v8i16 stp21_m, stp22_m, stp23_m, stp24_m, stp25_m, stp26_m; \ 194 v8i16 stp30_m, stp31_m, stp32_m, stp33_m, stp34_m, stp35_m; \ 195 v8i16 stp36_m, stp37_m, vec0_m, vec1_m; \ 196 v8i16 vec2_m, vec3_m, vec4_m, vec5_m, vec6_m; \ 197 v8i16 cnst0_m, cnst1_m, cnst4_m, cnst5_m; \ 198 v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, \ 199 cospi_24_64, -cospi_8_64, -cospi_24_64, \ 200 cospi_12_64, cospi_20_64 }; \ 201 v8i16 coeff1_m = { cospi_2_64, cospi_30_64, cospi_14_64, \ 202 cospi_18_64, cospi_10_64, cospi_22_64, \ 203 cospi_6_64, cospi_26_64 }; \ 204 v8i16 coeff2_m = { -cospi_2_64, -cospi_10_64, -cospi_18_64, \ 205 -cospi_26_64, 0, 0, 0, 0 }; \ 206 \ 207 /* stp 1 */ \ 208 ILVL_H2_SH(input2, input5, input3, input4, vec2_m, vec4_m); \ 209 ILVR_H2_SH(input2, input5, input3, input4, vec3_m, vec5_m); \ 210 \ 211 cnst4_m = __msa_splati_h(coeff_m, 0); \ 212 stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst4_m); \ 213 \ 214 cnst5_m = __msa_splati_h(coeff_m, 1); \ 215 cnst5_m = __msa_ilvev_h(cnst5_m, cnst4_m); \ 216 stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst5_m); \ 217 stp24_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst4_m); \ 218 stp23_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst5_m); \ 219 \ 220 /* stp2 */ \ 221 BUTTERFLY_4(input0, input1, stp22_m, stp23_m, \ 222 stp30_m, stp31_m, stp32_m, stp33_m); \ 223 BUTTERFLY_4(input7, input6, stp25_m, stp24_m, \ 224 stp37_m, stp36_m, stp35_m, stp34_m); \ 225 \ 226 ILVL_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec2_m, vec4_m); \ 227 ILVR_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec3_m, vec5_m); \ 228 \ 229 SPLATI_H2_SH(coeff_m, 2, 3, cnst0_m, cnst1_m); \ 230 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 231 stp26_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ 232 \ 233 cnst0_m = __msa_splati_h(coeff_m, 4); \ 234 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 235 stp21_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ 236 \ 237 SPLATI_H2_SH(coeff_m, 5, 2, cnst0_m, cnst1_m); \ 238 cnst1_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 239 stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \ 240 \ 241 cnst0_m = __msa_splati_h(coeff_m, 3); \ 242 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 243 stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \ 244 \ 245 /* stp4 */ \ 246 BUTTERFLY_4(stp30_m, stp37_m, stp26_m, stp21_m, \ 247 vec6_m, vec2_m, vec4_m, vec5_m); \ 248 BUTTERFLY_4(stp33_m, stp34_m, stp25_m, stp22_m, \ 249 stp21_m, stp23_m, stp24_m, stp31_m); \ 250 \ 251 ILVRL_H2_SH(vec2_m, vec6_m, vec1_m, vec0_m); \ 252 SPLATI_H2_SH(coeff1_m, 0, 1, cnst0_m, cnst1_m); \ 253 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 254 \ 255 out1 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 256 \ 257 cnst0_m = __msa_splati_h(coeff2_m, 0); \ 258 cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 259 out15 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 260 \ 261 ILVRL_H2_SH(vec4_m, vec5_m, vec1_m, vec0_m); \ 262 SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \ 263 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 264 \ 265 out9 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ 266 \ 267 cnst1_m = __msa_splati_h(coeff2_m, 2); \ 268 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 269 out7 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 270 \ 271 ILVRL_H2_SH(stp23_m, stp21_m, vec1_m, vec0_m); \ 272 SPLATI_H2_SH(coeff1_m, 4, 5, cnst0_m, cnst1_m); \ 273 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 274 out5 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 275 \ 276 cnst0_m = __msa_splati_h(coeff2_m, 1); \ 277 cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 278 out11 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 279 \ 280 ILVRL_H2_SH(stp24_m, stp31_m, vec1_m, vec0_m); \ 281 SPLATI_H2_SH(coeff1_m, 6, 7, cnst0_m, cnst1_m); \ 282 cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ 283 \ 284 out13 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ 285 \ 286 cnst1_m = __msa_splati_h(coeff2_m, 3); \ 287 cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ 288 out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ 289 } 290 291 #define FDCT_POSTPROC_2V_NEG_H(vec0, vec1) { \ 292 v8i16 tp0_m, tp1_m; \ 293 v8i16 one_m = __msa_ldi_h(1); \ 294 \ 295 tp0_m = __msa_clti_s_h(vec0, 0); \ 296 tp1_m = __msa_clti_s_h(vec1, 0); \ 297 vec0 += 1; \ 298 vec1 += 1; \ 299 tp0_m = one_m & tp0_m; \ 300 tp1_m = one_m & tp1_m; \ 301 vec0 += tp0_m; \ 302 vec1 += tp1_m; \ 303 vec0 >>= 2; \ 304 vec1 >>= 2; \ 305 } 306 307 #define FDCT32_POSTPROC_NEG_W(vec) { \ 308 v4i32 temp_m; \ 309 v4i32 one_m = __msa_ldi_w(1); \ 310 \ 311 temp_m = __msa_clti_s_w(vec, 0); \ 312 vec += 1; \ 313 temp_m = one_m & temp_m; \ 314 vec += temp_m; \ 315 vec >>= 2; \ 316 } 317 318 #define FDCT32_POSTPROC_2V_POS_H(vec0, vec1) { \ 319 v8i16 tp0_m, tp1_m; \ 320 v8i16 one = __msa_ldi_h(1); \ 321 \ 322 tp0_m = __msa_clei_s_h(vec0, 0); \ 323 tp1_m = __msa_clei_s_h(vec1, 0); \ 324 tp0_m = (v8i16)__msa_xori_b((v16u8)tp0_m, 255); \ 325 tp1_m = (v8i16)__msa_xori_b((v16u8)tp1_m, 255); \ 326 vec0 += 1; \ 327 vec1 += 1; \ 328 tp0_m = one & tp0_m; \ 329 tp1_m = one & tp1_m; \ 330 vec0 += tp0_m; \ 331 vec1 += tp1_m; \ 332 vec0 >>= 2; \ 333 vec1 >>= 2; \ 334 } 335 336 #define DOTP_CONST_PAIR_W(reg0_left, reg1_left, reg0_right, \ 337 reg1_right, const0, const1, \ 338 out0, out1, out2, out3) { \ 339 v4i32 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \ 340 v2i64 tp0_m, tp1_m, tp2_m, tp3_m; \ 341 v4i32 k0_m = __msa_fill_w((int32_t) const0); \ 342 \ 343 s0_m = __msa_fill_w((int32_t) const1); \ 344 k0_m = __msa_ilvev_w(s0_m, k0_m); \ 345 \ 346 ILVRL_W2_SW(-reg1_left, reg0_left, s1_m, s0_m); \ 347 ILVRL_W2_SW(reg0_left, reg1_left, s3_m, s2_m); \ 348 ILVRL_W2_SW(-reg1_right, reg0_right, s5_m, s4_m); \ 349 ILVRL_W2_SW(reg0_right, reg1_right, s7_m, s6_m); \ 350 \ 351 DOTP_SW2_SD(s0_m, s1_m, k0_m, k0_m, tp0_m, tp1_m); \ 352 DOTP_SW2_SD(s4_m, s5_m, k0_m, k0_m, tp2_m, tp3_m); \ 353 tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \ 354 tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \ 355 tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \ 356 tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \ 357 out0 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \ 358 out1 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \ 359 \ 360 DOTP_SW2_SD(s2_m, s3_m, k0_m, k0_m, tp0_m, tp1_m); \ 361 DOTP_SW2_SD(s6_m, s7_m, k0_m, k0_m, tp2_m, tp3_m); \ 362 tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \ 363 tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \ 364 tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \ 365 tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \ 366 out2 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \ 367 out3 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \ 368 } 369 370 void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr, 371 int32_t src_stride); 372 void fdct16x8_1d_row(int16_t *input, int16_t *output); 373 #endif // VPX_DSP_MIPS_FWD_TXFM_MSA_H_ 374