1 /*
2 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include <emmintrin.h>
9 #include "SkUtils_opts_SSE2.h"
10
sk_memset16_SSE2(uint16_t * dst,uint16_t value,int count)11 void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count)
12 {
13 SkASSERT(dst != NULL && count >= 0);
14
15 // dst must be 2-byte aligned.
16 SkASSERT((((size_t) dst) & 0x01) == 0);
17
18 if (count >= 32) {
19 while (((size_t)dst) & 0x0F) {
20 *dst++ = value;
21 --count;
22 }
23 __m128i *d = reinterpret_cast<__m128i*>(dst);
24 __m128i value_wide = _mm_set1_epi16(value);
25 while (count >= 32) {
26 _mm_store_si128(d , value_wide);
27 _mm_store_si128(d + 1, value_wide);
28 _mm_store_si128(d + 2, value_wide);
29 _mm_store_si128(d + 3, value_wide);
30 d += 4;
31 count -= 32;
32 }
33 dst = reinterpret_cast<uint16_t*>(d);
34 }
35 while (count > 0) {
36 *dst++ = value;
37 --count;
38 }
39 }
40
sk_memset32_SSE2(uint32_t * dst,uint32_t value,int count)41 void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
42 {
43 SkASSERT(dst != NULL && count >= 0);
44
45 // dst must be 4-byte aligned.
46 SkASSERT((((size_t) dst) & 0x03) == 0);
47
48 if (count >= 16) {
49 while (((size_t)dst) & 0x0F) {
50 *dst++ = value;
51 --count;
52 }
53 __m128i *d = reinterpret_cast<__m128i*>(dst);
54 __m128i value_wide = _mm_set1_epi32(value);
55 while (count >= 16) {
56 _mm_store_si128(d , value_wide);
57 _mm_store_si128(d + 1, value_wide);
58 _mm_store_si128(d + 2, value_wide);
59 _mm_store_si128(d + 3, value_wide);
60 d += 4;
61 count -= 16;
62 }
63 dst = reinterpret_cast<uint32_t*>(d);
64 }
65 while (count > 0) {
66 *dst++ = value;
67 --count;
68 }
69 }
70
sk_memcpy32_SSE2(uint32_t * dst,const uint32_t * src,int count)71 void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count)
72 {
73 if (count >= 16) {
74 while (((size_t)dst) & 0x0F) {
75 *dst++ = *src++;
76 --count;
77 }
78 __m128i *dst128 = reinterpret_cast<__m128i*>(dst);
79 const __m128i *src128 = reinterpret_cast<const __m128i*>(src);
80 while (count >= 16) {
81 __m128i a = _mm_loadu_si128(src128++);
82 __m128i b = _mm_loadu_si128(src128++);
83 __m128i c = _mm_loadu_si128(src128++);
84 __m128i d = _mm_loadu_si128(src128++);
85
86 _mm_store_si128(dst128++, a);
87 _mm_store_si128(dst128++, b);
88 _mm_store_si128(dst128++, c);
89 _mm_store_si128(dst128++, d);
90 count -= 16;
91 }
92 dst = reinterpret_cast<uint32_t*>(dst128);
93 src = reinterpret_cast<const uint32_t*>(src128);
94 }
95 while (count > 0) {
96 *dst++ = *src++;
97 --count;
98 }
99 }
100