1 /*
2  *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef VPX_DSP_X86_TRANSPOSE_SSE2_H_
12 #define VPX_DSP_X86_TRANSPOSE_SSE2_H_
13 
14 #include "./vpx_dsp_rtcd.h"
15 #include "vpx_dsp/x86/inv_txfm_sse2.h"
16 #include "vpx_dsp/x86/txfm_common_sse2.h"
17 
transpose_16bit_4x4(__m128i * res)18 static INLINE void transpose_16bit_4x4(__m128i *res) {
19   const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]);
20   const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]);
21 
22   res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1);
23   res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1);
24 }
25 
transpose_32bit_4x4(__m128i * const a0,__m128i * const a1,__m128i * const a2,__m128i * const a3)26 static INLINE void transpose_32bit_4x4(__m128i *const a0, __m128i *const a1,
27                                        __m128i *const a2, __m128i *const a3) {
28   // Unpack 32 bit elements. Goes from:
29   // a0: 00 01 02 03
30   // a1: 10 11 12 13
31   // a2: 20 21 22 23
32   // a3: 30 31 32 33
33   // to:
34   // b0: 00 10 01 11
35   // b1: 20 30 21 31
36   // b2: 02 12 03 13
37   // b3: 22 32 23 33
38 
39   const __m128i b0 = _mm_unpacklo_epi32(*a0, *a1);
40   const __m128i b1 = _mm_unpacklo_epi32(*a2, *a3);
41   const __m128i b2 = _mm_unpackhi_epi32(*a0, *a1);
42   const __m128i b3 = _mm_unpackhi_epi32(*a2, *a3);
43 
44   // Unpack 64 bit elements resulting in:
45   // a0: 00 10 20 30
46   // a1: 01 11 21 31
47   // a2: 02 12 22 32
48   // a3: 03 13 23 33
49   *a0 = _mm_unpacklo_epi64(b0, b1);
50   *a1 = _mm_unpackhi_epi64(b0, b1);
51   *a2 = _mm_unpacklo_epi64(b2, b3);
52   *a3 = _mm_unpackhi_epi64(b2, b3);
53 }
54 
55 #endif  // VPX_DSP_X86_TRANSPOSE_SSE2_H_
56