1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/desktop_capture/differ_block_sse2.h"
12 
13 #if defined(_MSC_VER)
14 #include <intrin.h>
15 #else
16 #include <mmintrin.h>
17 #include <emmintrin.h>
18 #endif
19 
20 #include "webrtc/modules/desktop_capture/differ_block.h"
21 
22 namespace webrtc {
23 
BlockDifference_SSE2_W16(const uint8_t * image1,const uint8_t * image2,int stride)24 extern bool BlockDifference_SSE2_W16(const uint8_t* image1,
25                                      const uint8_t* image2,
26                                      int stride) {
27   __m128i acc = _mm_setzero_si128();
28   __m128i v0;
29   __m128i v1;
30   __m128i sad;
31   for (int y = 0; y < kBlockSize; ++y) {
32     const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
33     const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
34     v0 = _mm_loadu_si128(i1);
35     v1 = _mm_loadu_si128(i2);
36     sad = _mm_sad_epu8(v0, v1);
37     acc = _mm_adds_epu16(acc, sad);
38     v0 = _mm_loadu_si128(i1 + 1);
39     v1 = _mm_loadu_si128(i2 + 1);
40     sad = _mm_sad_epu8(v0, v1);
41     acc = _mm_adds_epu16(acc, sad);
42     v0 = _mm_loadu_si128(i1 + 2);
43     v1 = _mm_loadu_si128(i2 + 2);
44     sad = _mm_sad_epu8(v0, v1);
45     acc = _mm_adds_epu16(acc, sad);
46     v0 = _mm_loadu_si128(i1 + 3);
47     v1 = _mm_loadu_si128(i2 + 3);
48     sad = _mm_sad_epu8(v0, v1);
49     acc = _mm_adds_epu16(acc, sad);
50 
51     // This essential means sad = acc >> 64. We only care about the lower 16
52     // bits.
53     sad = _mm_shuffle_epi32(acc, 0xEE);
54     sad = _mm_adds_epu16(sad, acc);
55     int diff = _mm_cvtsi128_si32(sad);
56     if (diff)
57       return true;
58     image1 += stride;
59     image2 += stride;
60   }
61   return false;
62 }
63 
BlockDifference_SSE2_W32(const uint8_t * image1,const uint8_t * image2,int stride)64 extern bool BlockDifference_SSE2_W32(const uint8_t* image1,
65                                      const uint8_t* image2,
66                                      int stride) {
67   __m128i acc = _mm_setzero_si128();
68   __m128i v0;
69   __m128i v1;
70   __m128i sad;
71   for (int y = 0; y < kBlockSize; ++y) {
72     const __m128i* i1 = reinterpret_cast<const __m128i*>(image1);
73     const __m128i* i2 = reinterpret_cast<const __m128i*>(image2);
74     v0 = _mm_loadu_si128(i1);
75     v1 = _mm_loadu_si128(i2);
76     sad = _mm_sad_epu8(v0, v1);
77     acc = _mm_adds_epu16(acc, sad);
78     v0 = _mm_loadu_si128(i1 + 1);
79     v1 = _mm_loadu_si128(i2 + 1);
80     sad = _mm_sad_epu8(v0, v1);
81     acc = _mm_adds_epu16(acc, sad);
82     v0 = _mm_loadu_si128(i1 + 2);
83     v1 = _mm_loadu_si128(i2 + 2);
84     sad = _mm_sad_epu8(v0, v1);
85     acc = _mm_adds_epu16(acc, sad);
86     v0 = _mm_loadu_si128(i1 + 3);
87     v1 = _mm_loadu_si128(i2 + 3);
88     sad = _mm_sad_epu8(v0, v1);
89     acc = _mm_adds_epu16(acc, sad);
90     v0 = _mm_loadu_si128(i1 + 4);
91     v1 = _mm_loadu_si128(i2 + 4);
92     sad = _mm_sad_epu8(v0, v1);
93     acc = _mm_adds_epu16(acc, sad);
94     v0 = _mm_loadu_si128(i1 + 5);
95     v1 = _mm_loadu_si128(i2 + 5);
96     sad = _mm_sad_epu8(v0, v1);
97     acc = _mm_adds_epu16(acc, sad);
98     v0 = _mm_loadu_si128(i1 + 6);
99     v1 = _mm_loadu_si128(i2 + 6);
100     sad = _mm_sad_epu8(v0, v1);
101     acc = _mm_adds_epu16(acc, sad);
102     v0 = _mm_loadu_si128(i1 + 7);
103     v1 = _mm_loadu_si128(i2 + 7);
104     sad = _mm_sad_epu8(v0, v1);
105     acc = _mm_adds_epu16(acc, sad);
106 
107     // This essential means sad = acc >> 64. We only care about the lower 16
108     // bits.
109     sad = _mm_shuffle_epi32(acc, 0xEE);
110     sad = _mm_adds_epu16(sad, acc);
111     int diff = _mm_cvtsi128_si32(sad);
112     if (diff)
113       return true;
114     image1 += stride;
115     image2 += stride;
116   }
117   return false;
118 }
119 
120 }  // namespace webrtc
121