1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkColorPriv.h"
9 #include "SkCpu.h"
10 #include "SkJumper.h"
11 #include "SkRasterPipeline.h"
12 #include "SkTemplates.h"
13
14 // A debugging mode that helps prioritize porting stages to SkJumper.
15 #if 0
16 #include "SkOnce.h"
17 #include <atomic>
18
19 #define M(st) {0},
20 static std::atomic<int> gMissing[] = { SK_RASTER_PIPELINE_STAGES(M) };
21 #undef M
22
23 #define M(st) #st,
24 static const char* gNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
25 #undef M
26
27 #define WHATS_NEXT
28 #endif
29
30 // We'll use __has_feature(memory_sanitizer) to detect MSAN.
31 // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud.
32 #if !defined(__has_feature)
33 #define __has_feature(x) 0
34 #endif
35
36 // Stages expect these constants to be set to these values.
37 // It's fine to rearrange and add new ones if you update SkJumper_constants.
38 using K = const SkJumper_constants;
39 static K kConstants = {
40 {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f},
41 };
42
43 #define STAGES(M) \
44 M(seed_shader) \
45 M(constant_color) \
46 M(clear) \
47 M(plus_) \
48 M(srcover) \
49 M(dstover) \
50 M(clamp_0) \
51 M(clamp_1) \
52 M(clamp_a) \
53 M(set_rgb) \
54 M(swap_rb) \
55 M(swap) \
56 M(move_src_dst) \
57 M(move_dst_src) \
58 M(premul) \
59 M(unpremul) \
60 M(from_srgb) \
61 M(to_srgb) \
62 M(scale_1_float) \
63 M(scale_u8) \
64 M(lerp_1_float) \
65 M(lerp_u8) \
66 M(lerp_565) \
67 M(load_tables) \
68 M(load_a8) \
69 M(store_a8) \
70 M(load_565) \
71 M(store_565) \
72 M(load_8888) \
73 M(store_8888) \
74 M(load_f16) \
75 M(store_f16) \
76 M(store_f32) \
77 M(luminance_to_alpha) \
78 M(matrix_2x3) \
79 M(matrix_3x4) \
80 M(matrix_4x5) \
81 M(matrix_perspective) \
82 M(clamp_x) \
83 M(clamp_y) \
84 M(repeat_x) \
85 M(repeat_y) \
86 M(mirror_x) \
87 M(mirror_y) \
88 M(linear_gradient_2stops)
89
90 // We can't express the real types of most stage functions portably, so we use a stand-in.
91 // We'll only ever call start_pipeline(), which then chains into the rest for us.
92 using StageFn = void(void);
93
94 // TODO: maybe don't need this wrapper anymore.
95 #define ASM(name, suffix) sk_##name##_##suffix
96
97 extern "C" {
98
99 #if __has_feature(memory_sanitizer)
100 // We'll just run portable code.
101
102 #elif defined(__aarch64__)
103 size_t ASM(start_pipeline,aarch64)(size_t, void**, K*, size_t);
104 StageFn ASM(just_return,aarch64);
105 #define M(st) StageFn ASM(st,aarch64);
106 STAGES(M)
107 #undef M
108
109 #elif defined(__arm__)
110 size_t ASM(start_pipeline,vfp4)(size_t, void**, K*, size_t);
111 StageFn ASM(just_return,vfp4);
112 #define M(st) StageFn ASM(st,vfp4);
113 STAGES(M)
114 #undef M
115
116 #elif defined(__x86_64__) || defined(_M_X64)
117 size_t ASM(start_pipeline,hsw )(size_t, void**, K*, size_t);
118 size_t ASM(start_pipeline,avx )(size_t, void**, K*, size_t);
119 size_t ASM(start_pipeline,sse41)(size_t, void**, K*, size_t);
120 size_t ASM(start_pipeline,sse2 )(size_t, void**, K*, size_t);
121
122 StageFn ASM(just_return,hsw),
123 ASM(just_return,avx),
124 ASM(just_return,sse41),
125 ASM(just_return,sse2);
126
127 #define M(st) StageFn ASM(st,hsw);
128 STAGES(M)
129 #undef M
130 #define M(st) StageFn ASM(st,avx);
131 STAGES(M)
132 #undef M
133 #define M(st) StageFn ASM(st,sse41);
134 STAGES(M)
135 #undef M
136 #define M(st) StageFn ASM(st,sse2);
137 STAGES(M)
138 #undef M
139 #endif
140
141 // Portable, single-pixel stages.
142 size_t sk_start_pipeline(size_t, void**, K*, size_t);
143 StageFn sk_just_return;
144 #define M(st) StageFn sk_##st;
145 STAGES(M)
146 #undef M
147 }
148
149 // Translate SkRasterPipeline's StockStage enum to StageFn function pointers.
150
151 #if __has_feature(memory_sanitizer)
152 // We'll just run portable code.
153
154 #elif defined(__aarch64__)
lookup_aarch64(SkRasterPipeline::StockStage st)155 static StageFn* lookup_aarch64(SkRasterPipeline::StockStage st) {
156 switch (st) {
157 default: return nullptr;
158 #define M(st) case SkRasterPipeline::st: return ASM(st,aarch64);
159 STAGES(M)
160 #undef M
161 }
162 }
163
164 #elif defined(__arm__)
lookup_vfp4(SkRasterPipeline::StockStage st)165 static StageFn* lookup_vfp4(SkRasterPipeline::StockStage st) {
166 switch (st) {
167 default: return nullptr;
168 #define M(st) case SkRasterPipeline::st: return ASM(st,vfp4);
169 STAGES(M)
170 #undef M
171 }
172 }
173
174 #elif defined(__x86_64__) || defined(_M_X64)
lookup_hsw(SkRasterPipeline::StockStage st)175 static StageFn* lookup_hsw(SkRasterPipeline::StockStage st) {
176 switch (st) {
177 default:
178 #ifdef WHATS_NEXT
179 gMissing[st]++;
180 #endif
181 return nullptr;
182 #define M(st) case SkRasterPipeline::st: return ASM(st,hsw);
183 STAGES(M)
184 #undef M
185 }
186 }
lookup_avx(SkRasterPipeline::StockStage st)187 static StageFn* lookup_avx(SkRasterPipeline::StockStage st) {
188 switch (st) {
189 default:
190 #ifdef WHATS_NEXT
191 gMissing[st]++;
192 #endif
193 return nullptr;
194 #define M(st) case SkRasterPipeline::st: return ASM(st,avx);
195 STAGES(M)
196 #undef M
197 }
198 }
lookup_sse41(SkRasterPipeline::StockStage st)199 static StageFn* lookup_sse41(SkRasterPipeline::StockStage st) {
200 switch (st) {
201 default:
202 #ifdef WHATS_NEXT
203 gMissing[st]++;
204 #endif
205 return nullptr;
206 #define M(st) case SkRasterPipeline::st: return ASM(st,sse41);
207 STAGES(M)
208 #undef M
209 }
210 }
lookup_sse2(SkRasterPipeline::StockStage st)211 static StageFn* lookup_sse2(SkRasterPipeline::StockStage st) {
212 switch (st) {
213 default: return nullptr;
214 #define M(st) case SkRasterPipeline::st: return ASM(st,sse2);
215 STAGES(M)
216 #undef M
217 }
218 }
219 #endif
220
lookup_portable(SkRasterPipeline::StockStage st)221 static StageFn* lookup_portable(SkRasterPipeline::StockStage st) {
222 switch (st) {
223 default: return nullptr;
224 #define M(st) case SkRasterPipeline::st: return sk_##st;
225 STAGES(M)
226 #undef M
227 }
228 }
229
run_with_jumper(size_t x,size_t n) const230 bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const {
231 #ifdef WHATS_NEXT
232 static SkOnce once;
233 once([] {
234 atexit([] {
235 for (int i = 0; i < (int)SK_ARRAY_COUNT(gMissing); i++) {
236 SkDebugf("%10d %s\n", gMissing[i].load(), gNames[i]);
237 }
238 });
239 });
240 #endif
241
242 SkAutoSTMalloc<64, void*> program(2*fStages.size() + 1);
243 const size_t limit = x+n;
244
245 auto build_and_run = [&](size_t min_stride,
246 StageFn* (*lookup)(SkRasterPipeline::StockStage),
247 StageFn* just_return,
248 size_t (*start_pipeline)(size_t, void**, K*, size_t)) {
249 if (x + min_stride <= limit) {
250 void** ip = program.get();
251 for (auto&& st : fStages) {
252 auto fn = lookup(st.stage);
253 if (!fn) {
254 return false;
255 }
256 *ip++ = (void*)fn;
257 if (st.ctx) {
258 *ip++ = st.ctx;
259 }
260 }
261 *ip = (void*)just_return;
262
263 x = start_pipeline(x, program.get(), &kConstants, limit);
264 }
265 return true;
266 };
267
268 // While possible, build and run at full vector stride.
269 #if __has_feature(memory_sanitizer)
270 // We'll just run portable code.
271
272 #elif defined(__aarch64__)
273 if (!build_and_run(4, lookup_aarch64, ASM(just_return,aarch64), ASM(start_pipeline,aarch64))) {
274 return false;
275 }
276
277 #elif defined(__arm__)
278 if (1 && SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) {
279 if (!build_and_run(2, lookup_vfp4, ASM(just_return,vfp4), ASM(start_pipeline,vfp4))) {
280 return false;
281 }
282 }
283
284 #elif defined(__x86_64__) || defined(_M_X64)
285 if (1 && SkCpu::Supports(SkCpu::HSW)) {
286 if (!build_and_run(1, lookup_hsw, ASM(just_return,hsw), ASM(start_pipeline,hsw))) {
287 return false;
288 }
289 }
290 if (1 && SkCpu::Supports(SkCpu::AVX)) {
291 if (!build_and_run(1, lookup_avx, ASM(just_return,avx), ASM(start_pipeline,avx))) {
292 return false;
293 }
294 }
295 if (1 && SkCpu::Supports(SkCpu::SSE41)) {
296 if (!build_and_run(4, lookup_sse41, ASM(just_return,sse41), ASM(start_pipeline,sse41))) {
297 return false;
298 }
299 }
300 if (1 && SkCpu::Supports(SkCpu::SSE2)) {
301 if (!build_and_run(4, lookup_sse2, ASM(just_return,sse2), ASM(start_pipeline,sse2))) {
302 return false;
303 }
304 }
305 #endif
306
307 // Finish up any leftover with portable code one pixel at a time.
308 return build_and_run(1, lookup_portable, sk_just_return, sk_start_pipeline);
309 }
310