1 /*
2 * Copyright 2009 The Android Open Source Project
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkBitmapFilter_opts_SSE2.h"
9 #include "SkBitmapProcState_opts_SSE2.h"
10 #include "SkBitmapProcState_opts_SSSE3.h"
11 #include "SkBitmapScaler.h"
12 #include "SkBlitMask.h"
13 #include "SkBlitRow.h"
14 #include "SkBlitRow_opts_SSE2.h"
15 #include "SkBlitRow_opts_SSE4.h"
16 #include "SkOncePtr.h"
17 #include "SkRTConf.h"
18
19 #if defined(_MSC_VER) && defined(_WIN64)
20 #include <intrin.h>
21 #endif
22
23 /* This file must *not* be compiled with -msse or any other optional SIMD
24 extension, otherwise gcc may generate SIMD instructions even for scalar ops
25 (and thus give an invalid instruction on Pentium3 on the code below).
26 For example, only files named *_SSE2.cpp in this directory should be
27 compiled with -msse2 or higher. */
28
29
30 /* Function to get the CPU SSE-level in runtime, for different compilers. */
31 #ifdef _MSC_VER
getcpuid(int info_type,int info[4])32 static inline void getcpuid(int info_type, int info[4]) {
33 #if defined(_WIN64)
34 __cpuid(info, info_type);
35 #else
36 __asm {
37 mov eax, [info_type]
38 cpuid
39 mov edi, [info]
40 mov [edi], eax
41 mov [edi+4], ebx
42 mov [edi+8], ecx
43 mov [edi+12], edx
44 }
45 #endif
46 }
47 #elif defined(__x86_64__)
getcpuid(int info_type,int info[4])48 static inline void getcpuid(int info_type, int info[4]) {
49 asm volatile (
50 "cpuid \n\t"
51 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
52 : "a"(info_type)
53 );
54 }
55 #else
getcpuid(int info_type,int info[4])56 static inline void getcpuid(int info_type, int info[4]) {
57 // We save and restore ebx, so this code can be compatible with -fPIC
58 asm volatile (
59 "pushl %%ebx \n\t"
60 "cpuid \n\t"
61 "movl %%ebx, %1 \n\t"
62 "popl %%ebx \n\t"
63 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
64 : "a"(info_type)
65 );
66 }
67 #endif
68
69 ////////////////////////////////////////////////////////////////////////////////
70
71 /* Fetch the SIMD level directly from the CPU, at run-time.
72 * Only checks the levels needed by the optimizations in this file.
73 */
get_SIMD_level()74 static int* get_SIMD_level() {
75 int cpu_info[4] = { 0, 0, 0, 0 };
76 getcpuid(1, cpu_info);
77
78 int* level = new int;
79
80 if ((cpu_info[2] & (1<<20)) != 0) {
81 *level = SK_CPU_SSE_LEVEL_SSE42;
82 } else if ((cpu_info[2] & (1<<19)) != 0) {
83 *level = SK_CPU_SSE_LEVEL_SSE41;
84 } else if ((cpu_info[2] & (1<<9)) != 0) {
85 *level = SK_CPU_SSE_LEVEL_SSSE3;
86 } else if ((cpu_info[3] & (1<<26)) != 0) {
87 *level = SK_CPU_SSE_LEVEL_SSE2;
88 } else {
89 *level = 0;
90 }
91 return level;
92 }
93
94 SK_DECLARE_STATIC_ONCE_PTR(int, gSIMDLevel);
95
96 /* Verify that the requested SIMD level is supported in the build.
97 * If not, check if the platform supports it.
98 */
supports_simd(int minLevel)99 static inline bool supports_simd(int minLevel) {
100 #if defined(SK_CPU_SSE_LEVEL)
101 if (minLevel <= SK_CPU_SSE_LEVEL) {
102 return true;
103 } else
104 #endif
105 {
106 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
107 /* For the Android framework we should always know at compile time if the device
108 * we are building for supports SSSE3. The one exception to this rule is on the
109 * emulator where we are compiled without the -mssse3 option (so we have no
110 * SSSE3 procs) but can be run on a host machine that supports SSSE3
111 * instructions. So for that particular case we disable our SSSE3 options.
112 */
113 return false;
114 #else
115 return minLevel <= *gSIMDLevel.get(get_SIMD_level);
116 #endif
117 }
118 }
119
120 ////////////////////////////////////////////////////////////////////////////////
121
PlatformConvolutionProcs(SkConvolutionProcs * procs)122 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
123 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
124 procs->fExtraHorizontalReads = 3;
125 procs->fConvolveVertically = &convolveVertically_SSE2;
126 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
127 procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
128 procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
129 }
130 }
131
132 ////////////////////////////////////////////////////////////////////////////////
133
platformProcs()134 void SkBitmapProcState::platformProcs() {
135 /* Every optimization in the function requires at least SSE2 */
136 if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
137 return;
138 }
139 const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3);
140
141 /* Check fSampleProc32 */
142 if (fSampleProc32 == S32_opaque_D32_filter_DX) {
143 if (ssse3) {
144 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
145 } else {
146 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
147 }
148 } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
149 if (ssse3) {
150 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
151 }
152 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
153 if (ssse3) {
154 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
155 } else {
156 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
157 }
158 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
159 if (ssse3) {
160 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
161 }
162 }
163
164 /* Check fMatrixProc */
165 if (fMatrixProc == ClampX_ClampY_filter_scale) {
166 fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
167 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
168 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
169 } else if (fMatrixProc == ClampX_ClampY_filter_affine) {
170 fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
171 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
172 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
173 }
174 }
175
176 ////////////////////////////////////////////////////////////////////////////////
177
178 static const SkBlitRow::Proc16 platform_16_procs[] = {
179 S32_D565_Opaque_SSE2, // S32_D565_Opaque
180 nullptr, // S32_D565_Blend
181 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque
182 nullptr, // S32A_D565_Blend
183 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither
184 nullptr, // S32_D565_Blend_Dither
185 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither
186 nullptr, // S32A_D565_Blend_Dither
187 };
188
PlatformFactory565(unsigned flags)189 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
190 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
191 return platform_16_procs[flags];
192 } else {
193 return nullptr;
194 }
195 }
196
197 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = {
198 Color32A_D565_SSE2, // Color32A_D565,
199 nullptr, // Color32A_D565_Dither
200 };
201
PlatformColorFactory565(unsigned flags)202 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
203 /* If you're thinking about writing an SSE4 version of this, do check it's
204 * actually faster on Atom. Our original SSE4 version was slower than this
205 * SSE2 version on Silvermont, and only marginally faster on a Core i7,
206 * mainly due to the MULLD timings.
207 */
208 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
209 return platform_565_colorprocs_SSE2[flags];
210 } else {
211 return nullptr;
212 }
213 }
214
215 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = {
216 nullptr, // S32_Opaque,
217 S32_Blend_BlitRow32_SSE2, // S32_Blend,
218 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque
219 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
220 };
221
222 static const SkBlitRow::Proc32 platform_32_procs_SSE4[] = {
223 nullptr, // S32_Opaque,
224 S32_Blend_BlitRow32_SSE2, // S32_Blend,
225 S32A_Opaque_BlitRow32_SSE4, // S32A_Opaque
226 S32A_Blend_BlitRow32_SSE2, // S32A_Blend,
227 };
228
PlatformProcs32(unsigned flags)229 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
230 if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) {
231 return platform_32_procs_SSE4[flags];
232 } else
233 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
234 return platform_32_procs_SSE2[flags];
235 } else {
236 return nullptr;
237 }
238 }
239
240 ////////////////////////////////////////////////////////////////////////////////
241
PlatformBlitRowProcs16(bool isOpaque)242 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
243 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
244 if (isOpaque) {
245 return SkBlitLCD16OpaqueRow_SSE2;
246 } else {
247 return SkBlitLCD16Row_SSE2;
248 }
249 } else {
250 return nullptr;
251 }
252
253 }
254
PlatformRowProcs(SkColorType,SkMask::Format,RowFlags)255 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, RowFlags) {
256 return nullptr;
257 }
258