1 /*
2  *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_
12 #define VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_
13 
14 #include <emmintrin.h>
15 
16 #include "./vpx_config.h"
17 #include "vpx/vpx_integer.h"
18 #include "vpx_dsp/x86/quantize_sse2.h"
19 
calculate_dqcoeff_and_store_32x32(const __m128i qcoeff,const __m128i dequant,const __m128i zero,tran_low_t * dqcoeff)20 static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff,
21                                                      const __m128i dequant,
22                                                      const __m128i zero,
23                                                      tran_low_t *dqcoeff) {
24   // Un-sign to bias rounding like C.
25   const __m128i coeff = _mm_abs_epi16(qcoeff);
26 
27   const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff);
28   const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff);
29 
30   const __m128i low = _mm_mullo_epi16(coeff, dequant);
31   const __m128i high = _mm_mulhi_epi16(coeff, dequant);
32   __m128i dqcoeff32_0 = _mm_unpacklo_epi16(low, high);
33   __m128i dqcoeff32_1 = _mm_unpackhi_epi16(low, high);
34 
35   // "Divide" by 2.
36   dqcoeff32_0 = _mm_srli_epi32(dqcoeff32_0, 1);
37   dqcoeff32_1 = _mm_srli_epi32(dqcoeff32_1, 1);
38 
39   dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0);
40   dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1);
41 
42 #if CONFIG_VP9_HIGHBITDEPTH
43   _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0);
44   _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1);
45 #else
46   _mm_store_si128((__m128i *)(dqcoeff),
47                   _mm_packs_epi32(dqcoeff32_0, dqcoeff32_1));
48 #endif  // CONFIG_VP9_HIGHBITDEPTH
49 }
50 
51 #endif  // VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_
52