1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "SkColorPriv.h"
9 #include "SkCpu.h"
10 #include "SkJumper.h"
11 #include "SkRasterPipeline.h"
12 #include "SkTemplates.h"
13 
14 // A debugging mode that helps prioritize porting stages to SkJumper.
15 #if 0
16     #include "SkOnce.h"
17     #include <atomic>
18 
19     #define M(st) {0},
20     static std::atomic<int> gMissing[] = { SK_RASTER_PIPELINE_STAGES(M) };
21     #undef M
22 
23     #define M(st) #st,
24     static const char* gNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
25     #undef M
26 
27     #define WHATS_NEXT
28 #endif
29 
30 // We'll use __has_feature(memory_sanitizer) to detect MSAN.
31 // SkJumper_generated.S is not compiled with MSAN, so MSAN would yell really loud.
32 #if !defined(__has_feature)
33     #define __has_feature(x) 0
34 #endif
35 
36 // Stages expect these constants to be set to these values.
37 // It's fine to rearrange and add new ones if you update SkJumper_constants.
38 using K = const SkJumper_constants;
39 static K kConstants = {
40     {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f},
41 };
42 
43 #define STAGES(M)         \
44     M(seed_shader)        \
45     M(constant_color)     \
46     M(clear)              \
47     M(plus_)              \
48     M(srcover)            \
49     M(dstover)            \
50     M(clamp_0)            \
51     M(clamp_1)            \
52     M(clamp_a)            \
53     M(set_rgb)            \
54     M(swap_rb)            \
55     M(swap)               \
56     M(move_src_dst)       \
57     M(move_dst_src)       \
58     M(premul)             \
59     M(unpremul)           \
60     M(from_srgb)          \
61     M(to_srgb)            \
62     M(scale_1_float)      \
63     M(scale_u8)           \
64     M(lerp_1_float)       \
65     M(lerp_u8)            \
66     M(lerp_565)           \
67     M(load_tables)        \
68     M(load_a8)            \
69     M(store_a8)           \
70     M(load_565)           \
71     M(store_565)          \
72     M(load_8888)          \
73     M(store_8888)         \
74     M(load_f16)           \
75     M(store_f16)          \
76     M(store_f32)          \
77     M(luminance_to_alpha) \
78     M(matrix_2x3)         \
79     M(matrix_3x4)         \
80     M(matrix_4x5)         \
81     M(matrix_perspective) \
82     M(clamp_x)            \
83     M(clamp_y)            \
84     M(repeat_x)           \
85     M(repeat_y)           \
86     M(mirror_x)           \
87     M(mirror_y)           \
88     M(linear_gradient_2stops)
89 
90 // We can't express the real types of most stage functions portably, so we use a stand-in.
91 // We'll only ever call start_pipeline(), which then chains into the rest for us.
92 using StageFn = void(void);
93 
94 // TODO: maybe don't need this wrapper anymore.
95 #define ASM(name, suffix) sk_##name##_##suffix
96 
97 extern "C" {
98 
99 #if __has_feature(memory_sanitizer)
100     // We'll just run portable code.
101 
102 #elif defined(__aarch64__)
103     size_t ASM(start_pipeline,aarch64)(size_t, void**, K*, size_t);
104     StageFn ASM(just_return,aarch64);
105     #define M(st) StageFn ASM(st,aarch64);
106         STAGES(M)
107     #undef M
108 
109 #elif defined(__arm__)
110     size_t ASM(start_pipeline,vfp4)(size_t, void**, K*, size_t);
111     StageFn ASM(just_return,vfp4);
112     #define M(st) StageFn ASM(st,vfp4);
113         STAGES(M)
114     #undef M
115 
116 #elif defined(__x86_64__) || defined(_M_X64)
117     size_t ASM(start_pipeline,hsw  )(size_t, void**, K*, size_t);
118     size_t ASM(start_pipeline,avx  )(size_t, void**, K*, size_t);
119     size_t ASM(start_pipeline,sse41)(size_t, void**, K*, size_t);
120     size_t ASM(start_pipeline,sse2 )(size_t, void**, K*, size_t);
121 
122     StageFn ASM(just_return,hsw),
123             ASM(just_return,avx),
124             ASM(just_return,sse41),
125             ASM(just_return,sse2);
126 
127     #define M(st) StageFn ASM(st,hsw);
128         STAGES(M)
129     #undef M
130     #define M(st) StageFn ASM(st,avx);
131         STAGES(M)
132     #undef M
133     #define M(st) StageFn ASM(st,sse41);
134         STAGES(M)
135     #undef M
136     #define M(st) StageFn ASM(st,sse2);
137         STAGES(M)
138     #undef M
139 #endif
140 
141     // Portable, single-pixel stages.
142     size_t sk_start_pipeline(size_t, void**, K*, size_t);
143     StageFn sk_just_return;
144     #define M(st) StageFn sk_##st;
145         STAGES(M)
146     #undef M
147 }
148 
149 // Translate SkRasterPipeline's StockStage enum to StageFn function pointers.
150 
151 #if __has_feature(memory_sanitizer)
152     // We'll just run portable code.
153 
154 #elif defined(__aarch64__)
lookup_aarch64(SkRasterPipeline::StockStage st)155     static StageFn* lookup_aarch64(SkRasterPipeline::StockStage st) {
156         switch (st) {
157             default: return nullptr;
158         #define M(st) case SkRasterPipeline::st: return ASM(st,aarch64);
159             STAGES(M)
160         #undef M
161         }
162     }
163 
164 #elif defined(__arm__)
lookup_vfp4(SkRasterPipeline::StockStage st)165     static StageFn* lookup_vfp4(SkRasterPipeline::StockStage st) {
166         switch (st) {
167             default: return nullptr;
168         #define M(st) case SkRasterPipeline::st: return ASM(st,vfp4);
169             STAGES(M)
170         #undef M
171         }
172     }
173 
174 #elif defined(__x86_64__) || defined(_M_X64)
lookup_hsw(SkRasterPipeline::StockStage st)175     static StageFn* lookup_hsw(SkRasterPipeline::StockStage st) {
176         switch (st) {
177             default:
178         #ifdef WHATS_NEXT
179                 gMissing[st]++;
180         #endif
181                 return nullptr;
182         #define M(st) case SkRasterPipeline::st: return ASM(st,hsw);
183             STAGES(M)
184         #undef M
185         }
186     }
lookup_avx(SkRasterPipeline::StockStage st)187     static StageFn* lookup_avx(SkRasterPipeline::StockStage st) {
188         switch (st) {
189             default:
190         #ifdef WHATS_NEXT
191                 gMissing[st]++;
192         #endif
193                 return nullptr;
194         #define M(st) case SkRasterPipeline::st: return ASM(st,avx);
195             STAGES(M)
196         #undef M
197         }
198     }
lookup_sse41(SkRasterPipeline::StockStage st)199     static StageFn* lookup_sse41(SkRasterPipeline::StockStage st) {
200         switch (st) {
201             default:
202         #ifdef WHATS_NEXT
203                 gMissing[st]++;
204         #endif
205                 return nullptr;
206         #define M(st) case SkRasterPipeline::st: return ASM(st,sse41);
207             STAGES(M)
208         #undef M
209         }
210     }
lookup_sse2(SkRasterPipeline::StockStage st)211     static StageFn* lookup_sse2(SkRasterPipeline::StockStage st) {
212         switch (st) {
213             default: return nullptr;
214         #define M(st) case SkRasterPipeline::st: return ASM(st,sse2);
215             STAGES(M)
216         #undef M
217         }
218     }
219 #endif
220 
lookup_portable(SkRasterPipeline::StockStage st)221 static StageFn* lookup_portable(SkRasterPipeline::StockStage st) {
222     switch (st) {
223         default: return nullptr;
224     #define M(st) case SkRasterPipeline::st: return sk_##st;
225         STAGES(M)
226     #undef M
227     }
228 }
229 
run_with_jumper(size_t x,size_t n) const230 bool SkRasterPipeline::run_with_jumper(size_t x, size_t n) const {
231 #ifdef WHATS_NEXT
232     static SkOnce once;
233     once([] {
234         atexit([] {
235             for (int i = 0; i < (int)SK_ARRAY_COUNT(gMissing); i++) {
236                 SkDebugf("%10d %s\n", gMissing[i].load(), gNames[i]);
237             }
238         });
239     });
240 #endif
241 
242     SkAutoSTMalloc<64, void*> program(2*fStages.size() + 1);
243     const size_t limit = x+n;
244 
245     auto build_and_run = [&](size_t   min_stride,
246                              StageFn* (*lookup)(SkRasterPipeline::StockStage),
247                              StageFn* just_return,
248                              size_t   (*start_pipeline)(size_t, void**, K*, size_t)) {
249         if (x + min_stride <= limit) {
250             void** ip = program.get();
251             for (auto&& st : fStages) {
252                 auto fn = lookup(st.stage);
253                 if (!fn) {
254                     return false;
255                 }
256                 *ip++ = (void*)fn;
257                 if (st.ctx) {
258                     *ip++ = st.ctx;
259                 }
260             }
261             *ip = (void*)just_return;
262 
263             x = start_pipeline(x, program.get(), &kConstants, limit);
264         }
265         return true;
266     };
267 
268     // While possible, build and run at full vector stride.
269 #if __has_feature(memory_sanitizer)
270     // We'll just run portable code.
271 
272 #elif defined(__aarch64__)
273     if (!build_and_run(4, lookup_aarch64, ASM(just_return,aarch64), ASM(start_pipeline,aarch64))) {
274         return false;
275     }
276 
277 #elif defined(__arm__)
278     if (1 && SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) {
279         if (!build_and_run(2, lookup_vfp4, ASM(just_return,vfp4), ASM(start_pipeline,vfp4))) {
280             return false;
281         }
282     }
283 
284 #elif defined(__x86_64__) || defined(_M_X64)
285     if (1 && SkCpu::Supports(SkCpu::HSW)) {
286         if (!build_and_run(1, lookup_hsw, ASM(just_return,hsw), ASM(start_pipeline,hsw))) {
287             return false;
288         }
289     }
290     if (1 && SkCpu::Supports(SkCpu::AVX)) {
291         if (!build_and_run(1, lookup_avx, ASM(just_return,avx), ASM(start_pipeline,avx))) {
292             return false;
293         }
294     }
295     if (1 && SkCpu::Supports(SkCpu::SSE41)) {
296         if (!build_and_run(4, lookup_sse41, ASM(just_return,sse41), ASM(start_pipeline,sse41))) {
297             return false;
298         }
299     }
300     if (1 && SkCpu::Supports(SkCpu::SSE2)) {
301         if (!build_and_run(4, lookup_sse2, ASM(just_return,sse2), ASM(start_pipeline,sse2))) {
302             return false;
303         }
304     }
305 #endif
306 
307     // Finish up any leftover with portable code one pixel at a time.
308     return build_and_run(1, lookup_portable, sk_just_return, sk_start_pipeline);
309 }
310