1 /*
2  * Copyright 2009 The Android Open Source Project
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include <emmintrin.h>
9 #include "SkUtils_opts_SSE2.h"
10 
sk_memset16_SSE2(uint16_t * dst,uint16_t value,int count)11 void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count)
12 {
13     SkASSERT(dst != NULL && count >= 0);
14 
15     // dst must be 2-byte aligned.
16     SkASSERT((((size_t) dst) & 0x01) == 0);
17 
18     if (count >= 32) {
19         while (((size_t)dst) & 0x0F) {
20             *dst++ = value;
21             --count;
22         }
23         __m128i *d = reinterpret_cast<__m128i*>(dst);
24         __m128i value_wide = _mm_set1_epi16(value);
25         while (count >= 32) {
26             _mm_store_si128(d    , value_wide);
27             _mm_store_si128(d + 1, value_wide);
28             _mm_store_si128(d + 2, value_wide);
29             _mm_store_si128(d + 3, value_wide);
30             d += 4;
31             count -= 32;
32         }
33         dst = reinterpret_cast<uint16_t*>(d);
34     }
35     while (count > 0) {
36         *dst++ = value;
37         --count;
38     }
39 }
40 
sk_memset32_SSE2(uint32_t * dst,uint32_t value,int count)41 void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
42 {
43     SkASSERT(dst != NULL && count >= 0);
44 
45     // dst must be 4-byte aligned.
46     SkASSERT((((size_t) dst) & 0x03) == 0);
47 
48     if (count >= 16) {
49         while (((size_t)dst) & 0x0F) {
50             *dst++ = value;
51             --count;
52         }
53         __m128i *d = reinterpret_cast<__m128i*>(dst);
54         __m128i value_wide = _mm_set1_epi32(value);
55         while (count >= 16) {
56             _mm_store_si128(d    , value_wide);
57             _mm_store_si128(d + 1, value_wide);
58             _mm_store_si128(d + 2, value_wide);
59             _mm_store_si128(d + 3, value_wide);
60             d += 4;
61             count -= 16;
62         }
63         dst = reinterpret_cast<uint32_t*>(d);
64     }
65     while (count > 0) {
66         *dst++ = value;
67         --count;
68     }
69 }
70 
sk_memcpy32_SSE2(uint32_t * dst,const uint32_t * src,int count)71 void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count)
72 {
73     if (count >= 16) {
74         while (((size_t)dst) & 0x0F) {
75             *dst++ = *src++;
76             --count;
77         }
78         __m128i *dst128 = reinterpret_cast<__m128i*>(dst);
79         const __m128i *src128 = reinterpret_cast<const __m128i*>(src);
80         while (count >= 16) {
81             __m128i a =  _mm_loadu_si128(src128++);
82             __m128i b =  _mm_loadu_si128(src128++);
83             __m128i c =  _mm_loadu_si128(src128++);
84             __m128i d =  _mm_loadu_si128(src128++);
85 
86             _mm_store_si128(dst128++, a);
87             _mm_store_si128(dst128++, b);
88             _mm_store_si128(dst128++, c);
89             _mm_store_si128(dst128++, d);
90             count -= 16;
91         }
92         dst = reinterpret_cast<uint32_t*>(dst128);
93         src = reinterpret_cast<const uint32_t*>(src128);
94     }
95     while (count > 0) {
96         *dst++ = *src++;
97         --count;
98     }
99 }
100