1 /*
2 * Copyright 2016 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkCanvas.h"
9 #include "include/core/SkDeferredDisplayList.h"
10 #include "include/core/SkGraphics.h"
11 #include "include/core/SkPicture.h"
12 #include "include/core/SkPictureRecorder.h"
13 #include "include/core/SkStream.h"
14 #include "include/core/SkSurface.h"
15 #include "include/core/SkSurfaceProps.h"
16 #include "include/effects/SkPerlinNoiseShader.h"
17 #include "include/gpu/GrDirectContext.h"
18 #include "src/core/SkOSFile.h"
19 #include "src/core/SkTaskGroup.h"
20 #include "src/gpu/GrCaps.h"
21 #include "src/gpu/GrDirectContextPriv.h"
22 #include "src/gpu/SkGr.h"
23 #include "src/utils/SkMultiPictureDocument.h"
24 #include "src/utils/SkOSPath.h"
25 #include "tools/DDLPromiseImageHelper.h"
26 #include "tools/DDLTileHelper.h"
27 #include "tools/SkSharingProc.h"
28 #include "tools/ToolUtils.h"
29 #include "tools/flags/CommandLineFlags.h"
30 #include "tools/flags/CommonFlags.h"
31 #include "tools/flags/CommonFlagsConfig.h"
32 #include "tools/gpu/FlushFinishTracker.h"
33 #include "tools/gpu/GpuTimer.h"
34 #include "tools/gpu/GrContextFactory.h"
35
36 #ifdef SK_XML
37 #include "modules/svg/include/SkSVGDOM.h"
38 #include "src/xml/SkDOM.h"
39 #endif
40
41 #include <stdlib.h>
42 #include <algorithm>
43 #include <array>
44 #include <chrono>
45 #include <cinttypes>
46 #include <cmath>
47 #include <vector>
48
49 /**
50 * This is a minimalist program whose sole purpose is to open a .skp or .svg file, benchmark it on a
51 * single config, and exit. It is intended to be used through skpbench.py rather than invoked
52 * directly. Limiting the entire process to a single config/skp pair helps to keep the results
53 * repeatable.
54 *
55 * No tiling, looping, or other fanciness is used; it just draws the skp whole into a size-matched
56 * render target and syncs the GPU after each draw.
57 *
58 * Well, maybe a little fanciness, MSKP's can be loaded and played. The animation is played as many
59 * times as necessary to reach the target sample duration and FPS is reported.
60 *
61 * Currently, only GPU configs are supported.
62 */
63
64 static DEFINE_bool(ddl, false, "record the skp into DDLs before rendering");
65 static DEFINE_int(ddlNumRecordingThreads, 0, "number of DDL recording threads (0=num_cores)");
66 static DEFINE_int(ddlTilingWidthHeight, 0, "number of tiles along one edge when in DDL mode");
67
68 static DEFINE_bool(comparableDDL, false, "render in a way that is comparable to 'comparableSKP'");
69 static DEFINE_bool(comparableSKP, false, "report in a way that is comparable to 'comparableDDL'");
70
71 static DEFINE_int(duration, 5000, "number of milliseconds to run the benchmark");
72 static DEFINE_int(sampleMs, 50, "minimum duration of a sample");
73 static DEFINE_bool(gpuClock, false, "time on the gpu clock (gpu work only)");
74 static DEFINE_bool(fps, false, "use fps instead of ms");
75 static DEFINE_string(src, "",
76 "path to a single .skp or .svg file, or 'warmup' for a builtin warmup run");
77 static DEFINE_string(png, "", "if set, save a .png proof to disk at this file location");
78 static DEFINE_int(verbosity, 4, "level of verbosity (0=none to 5=debug)");
79 static DEFINE_bool(suppressHeader, false, "don't print a header row before the results");
80 static DEFINE_double(scale, 1, "Scale the size of the canvas and the zoom level by this factor.");
81 static DEFINE_bool(dumpSamples, false, "print the individual samples to stdout");
82
83 static const char header[] =
84 " accum median max min stddev samples sample_ms clock metric config bench";
85
86 static const char resultFormat[] =
87 "%8.4g %8.4g %8.4g %8.4g %6.3g%% %7zu %9i %-5s %-6s %-9s %s";
88
89 static constexpr int kNumFlushesToPrimeCache = 3;
90
91 struct Sample {
92 using duration = std::chrono::nanoseconds;
93
SampleSample94 Sample() : fFrames(0), fDuration(0) {}
secondsSample95 double seconds() const { return std::chrono::duration<double>(fDuration).count(); }
msSample96 double ms() const { return std::chrono::duration<double, std::milli>(fDuration).count(); }
valueSample97 double value() const { return FLAGS_fps ? fFrames / this->seconds() : this->ms() / fFrames; }
metricSample98 static const char* metric() { return FLAGS_fps ? "fps" : "ms"; }
99
100 int fFrames;
101 duration fDuration;
102 };
103
104 class GpuSync {
105 public:
GpuSync()106 GpuSync() {}
~GpuSync()107 ~GpuSync() {}
108
109 void waitIfNeeded();
110
111 sk_gpu_test::FlushFinishTracker* newFlushTracker(GrDirectContext* context);
112
113 private:
114 enum { kMaxFrameLag = 3 };
115 sk_sp<sk_gpu_test::FlushFinishTracker> fFinishTrackers[kMaxFrameLag - 1];
116 int fCurrentFlushIdx = 0;
117 };
118
119 enum class ExitErr {
120 kOk = 0,
121 kUsage = 64,
122 kData = 65,
123 kUnavailable = 69,
124 kIO = 74,
125 kSoftware = 70
126 };
127
128 static void flush_with_sync(GrDirectContext*, GpuSync&);
129 static void draw_skp_and_flush_with_sync(GrDirectContext*, SkSurface*, const SkPicture*, GpuSync&);
130 static sk_sp<SkPicture> create_warmup_skp();
131 static sk_sp<SkPicture> create_skp_from_svg(SkStream*, const char* filename);
132 static bool mkdir_p(const SkString& name);
133 static SkString join(const CommandLineFlags::StringArray&);
134 static void exitf(ExitErr, const char* format, ...);
135
136 // An interface used by both static SKPs and animated SKPs
137 class SkpProducer {
138 public:
~SkpProducer()139 virtual ~SkpProducer() {}
140 // Draw an SkPicture to the provided surface, flush the surface, and sync the GPU.
141 // You may use the static draw_skp_and_flush_with_sync declared above.
142 // returned int tells how many draw/flush/sync were done.
143 virtual int drawAndFlushAndSync(GrDirectContext*, SkSurface* surface, GpuSync& gpuSync) = 0;
144 };
145
146 class StaticSkp : public SkpProducer {
147 public:
StaticSkp(sk_sp<SkPicture> skp)148 StaticSkp(sk_sp<SkPicture> skp) : fSkp(skp) {}
149
drawAndFlushAndSync(GrDirectContext * context,SkSurface * surface,GpuSync & gpuSync)150 int drawAndFlushAndSync(GrDirectContext* context,
151 SkSurface* surface,
152 GpuSync& gpuSync) override {
153 draw_skp_and_flush_with_sync(context, surface, fSkp.get(), gpuSync);
154 return 1;
155 }
156
157 private:
158 sk_sp<SkPicture> fSkp;
159 };
160
161 // A class for playing/benchmarking a multi frame SKP file.
162 // the recorded frames are looped over repeatedly.
163 // This type of benchmark may have a much higher std dev in frame times.
164 class MultiFrameSkp : public SkpProducer {
165 public:
MultiFrameSkp(const std::vector<SkDocumentPage> & frames)166 MultiFrameSkp(const std::vector<SkDocumentPage>& frames) : fFrames(frames){}
167
MakeFromFile(const SkString & path)168 static std::unique_ptr<MultiFrameSkp> MakeFromFile(const SkString& path) {
169 // Load the multi frame skp at the given filename.
170 std::unique_ptr<SkStreamAsset> stream = SkStream::MakeFromFile(path.c_str());
171 if (!stream) { return nullptr; }
172
173 // Attempt to deserialize with an image sharing serial proc.
174 auto deserialContext = std::make_unique<SkSharingDeserialContext>();
175 SkDeserialProcs procs;
176 procs.fImageProc = SkSharingDeserialContext::deserializeImage;
177 procs.fImageCtx = deserialContext.get();
178
179 // The outer format of multi-frame skps is the multi-picture document, which is a
180 // skp file containing subpictures separated by annotations.
181 int page_count = SkMultiPictureDocumentReadPageCount(stream.get());
182 if (!page_count) {
183 return nullptr;
184 }
185 std::vector<SkDocumentPage> frames(page_count); // can't call reserve, why?
186 if (!SkMultiPictureDocumentRead(stream.get(), frames.data(), page_count, &procs)) {
187 return nullptr;
188 }
189
190 return std::make_unique<MultiFrameSkp>(frames);
191 }
192
193 // Draw the whole animation once.
drawAndFlushAndSync(GrDirectContext * context,SkSurface * surface,GpuSync & gpuSync)194 int drawAndFlushAndSync(GrDirectContext* context,
195 SkSurface* surface,
196 GpuSync& gpuSync) override {
197 for (int i=0; i<this->count(); i++){
198 draw_skp_and_flush_with_sync(context, surface, this->frame(i).get(), gpuSync);
199 }
200 return this->count();
201 }
202 // Return the requested frame.
frame(int n) const203 sk_sp<SkPicture> frame(int n) const { return fFrames[n].fPicture; }
204 // Return the number of frames in the recording.
count() const205 int count() const { return fFrames.size(); }
206 private:
207 std::vector<SkDocumentPage> fFrames;
208 };
209
ddl_sample(GrDirectContext * dContext,DDLTileHelper * tiles,GpuSync & gpuSync,Sample * sample,SkTaskGroup * recordingTaskGroup,SkTaskGroup * gpuTaskGroup,std::chrono::high_resolution_clock::time_point * startStopTime,SkPicture * picture)210 static void ddl_sample(GrDirectContext* dContext, DDLTileHelper* tiles, GpuSync& gpuSync,
211 Sample* sample, SkTaskGroup* recordingTaskGroup, SkTaskGroup* gpuTaskGroup,
212 std::chrono::high_resolution_clock::time_point* startStopTime,
213 SkPicture* picture) {
214 using clock = std::chrono::high_resolution_clock;
215
216 clock::time_point start = *startStopTime;
217
218 if (FLAGS_comparableDDL) {
219 SkASSERT(!FLAGS_comparableSKP);
220
221 // In this mode we simply alternate between creating a DDL and drawing it - all on one
222 // thread. The interleaving is so that we don't starve the GPU.
223 // One unfortunate side effect of this is that we can't delete the DDLs until after
224 // the GPU work is flushed.
225 tiles->interleaveDDLCreationAndDraw(dContext, picture);
226 } else if (FLAGS_comparableSKP) {
227 // In this mode simply draw the re-inflated per-tile SKPs directly to the GPU w/o going
228 // through a DDL.
229 tiles->drawAllTilesDirectly(dContext, picture);
230 } else {
231 tiles->kickOffThreadedWork(recordingTaskGroup, gpuTaskGroup, dContext, picture);
232 recordingTaskGroup->wait();
233 }
234
235 if (gpuTaskGroup) {
236 gpuTaskGroup->add([&]{
237 flush_with_sync(dContext, gpuSync);
238 });
239 gpuTaskGroup->wait();
240 } else {
241 flush_with_sync(dContext, gpuSync);
242 }
243
244 *startStopTime = clock::now();
245
246 if (sample) {
247 sample->fDuration += *startStopTime - start;
248 sample->fFrames++;
249 }
250 }
251
run_ddl_benchmark(sk_gpu_test::TestContext * testContext,GrDirectContext * dContext,sk_sp<SkSurface> dstSurface,SkPicture * inputPicture,std::vector<Sample> * samples)252 static void run_ddl_benchmark(sk_gpu_test::TestContext* testContext, GrDirectContext *dContext,
253 sk_sp<SkSurface> dstSurface, SkPicture* inputPicture,
254 std::vector<Sample>* samples) {
255 using clock = std::chrono::high_resolution_clock;
256 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
257 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
258
259 SkSurfaceCharacterization dstCharacterization;
260 SkAssertResult(dstSurface->characterize(&dstCharacterization));
261
262 SkIRect viewport = dstSurface->imageInfo().bounds();
263
264 SkYUVAPixmapInfo::SupportedDataTypes supportedYUVADataTypes(*dContext);
265 DDLPromiseImageHelper promiseImageHelper(supportedYUVADataTypes);
266 sk_sp<SkPicture> newSKP = promiseImageHelper.recreateSKP(dContext, inputPicture);
267 if (!newSKP) {
268 exitf(ExitErr::kUnavailable, "DDL: conversion of skp failed");
269 }
270
271 promiseImageHelper.uploadAllToGPU(nullptr, dContext);
272
273 DDLTileHelper tiles(dContext, dstCharacterization, viewport,
274 FLAGS_ddlTilingWidthHeight, FLAGS_ddlTilingWidthHeight,
275 /* addRandomPaddingToDst */ false);
276
277 tiles.createBackendTextures(nullptr, dContext);
278
279 // In comparable modes, there is no GPU thread. The following pointers are all null.
280 // Otherwise, we transfer testContext onto the GPU thread until after the bench.
281 std::unique_ptr<SkExecutor> gpuThread;
282 std::unique_ptr<SkTaskGroup> gpuTaskGroup;
283 std::unique_ptr<SkExecutor> recordingThreadPool;
284 std::unique_ptr<SkTaskGroup> recordingTaskGroup;
285 if (!FLAGS_comparableDDL && !FLAGS_comparableSKP) {
286 gpuThread = SkExecutor::MakeFIFOThreadPool(1, false);
287 gpuTaskGroup = std::make_unique<SkTaskGroup>(*gpuThread);
288 recordingThreadPool = SkExecutor::MakeFIFOThreadPool(FLAGS_ddlNumRecordingThreads, false);
289 recordingTaskGroup = std::make_unique<SkTaskGroup>(*recordingThreadPool);
290 testContext->makeNotCurrent();
291 gpuTaskGroup->add([=]{ testContext->makeCurrent(); });
292 }
293
294 clock::time_point startStopTime = clock::now();
295
296 GpuSync gpuSync;
297 ddl_sample(dContext, &tiles, gpuSync, nullptr, recordingTaskGroup.get(),
298 gpuTaskGroup.get(), &startStopTime, newSKP.get());
299
300 clock::duration cumulativeDuration = std::chrono::milliseconds(0);
301
302 do {
303 samples->emplace_back();
304 Sample& sample = samples->back();
305
306 do {
307 tiles.resetAllTiles();
308 ddl_sample(dContext, &tiles, gpuSync, &sample, recordingTaskGroup.get(),
309 gpuTaskGroup.get(), &startStopTime, newSKP.get());
310 } while (sample.fDuration < sampleDuration);
311
312 cumulativeDuration += sample.fDuration;
313 } while (cumulativeDuration < benchDuration || 0 == samples->size() % 2);
314
315 // Move the context back to this thread now that we're done benching.
316 if (gpuTaskGroup) {
317 gpuTaskGroup->add([=]{
318 testContext->makeNotCurrent();
319 });
320 gpuTaskGroup->wait();
321 testContext->makeCurrent();
322 }
323
324 if (!FLAGS_png.isEmpty()) {
325 // The user wants to see the final result
326 dstSurface->draw(tiles.composeDDL());
327 dstSurface->flushAndSubmit();
328 }
329
330 tiles.resetAllTiles();
331
332 // Make sure the gpu has finished all its work before we exit this function and delete the
333 // fence.
334 dContext->flush();
335 dContext->submit(true);
336
337 promiseImageHelper.deleteAllFromGPU(nullptr, dContext);
338
339 tiles.deleteBackendTextures(nullptr, dContext);
340
341 }
342
run_benchmark(GrDirectContext * context,SkSurface * surface,SkpProducer * skpp,std::vector<Sample> * samples)343 static void run_benchmark(GrDirectContext* context, SkSurface* surface, SkpProducer* skpp,
344 std::vector<Sample>* samples) {
345 using clock = std::chrono::high_resolution_clock;
346 const Sample::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
347 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
348
349 GpuSync gpuSync;
350 int i = 0;
351 do {
352 i += skpp->drawAndFlushAndSync(context, surface, gpuSync);
353 } while(i < kNumFlushesToPrimeCache);
354
355 clock::time_point now = clock::now();
356 const clock::time_point endTime = now + benchDuration;
357
358 do {
359 clock::time_point sampleStart = now;
360 samples->emplace_back();
361 Sample& sample = samples->back();
362
363 do {
364 sample.fFrames += skpp->drawAndFlushAndSync(context, surface, gpuSync);
365 now = clock::now();
366 sample.fDuration = now - sampleStart;
367 } while (sample.fDuration < sampleDuration);
368 } while (now < endTime || 0 == samples->size() % 2);
369
370 // Make sure the gpu has finished all its work before we exit this function and delete the
371 // fence.
372 surface->flush();
373 context->submit(true);
374 }
375
run_gpu_time_benchmark(sk_gpu_test::GpuTimer * gpuTimer,GrDirectContext * context,SkSurface * surface,const SkPicture * skp,std::vector<Sample> * samples)376 static void run_gpu_time_benchmark(sk_gpu_test::GpuTimer* gpuTimer, GrDirectContext* context,
377 SkSurface* surface, const SkPicture* skp,
378 std::vector<Sample>* samples) {
379 using sk_gpu_test::PlatformTimerQuery;
380 using clock = std::chrono::steady_clock;
381 const clock::duration sampleDuration = std::chrono::milliseconds(FLAGS_sampleMs);
382 const clock::duration benchDuration = std::chrono::milliseconds(FLAGS_duration);
383
384 if (!gpuTimer->disjointSupport()) {
385 fprintf(stderr, "WARNING: GPU timer cannot detect disjoint operations; "
386 "results may be unreliable\n");
387 }
388
389 GpuSync gpuSync;
390 draw_skp_and_flush_with_sync(context, surface, skp, gpuSync);
391
392 PlatformTimerQuery previousTime = 0;
393 for (int i = 1; i < kNumFlushesToPrimeCache; ++i) {
394 gpuTimer->queueStart();
395 draw_skp_and_flush_with_sync(context, surface, skp, gpuSync);
396 previousTime = gpuTimer->queueStop();
397 }
398
399 clock::time_point now = clock::now();
400 const clock::time_point endTime = now + benchDuration;
401
402 do {
403 const clock::time_point sampleEndTime = now + sampleDuration;
404 samples->emplace_back();
405 Sample& sample = samples->back();
406
407 do {
408 gpuTimer->queueStart();
409 draw_skp_and_flush_with_sync(context, surface, skp, gpuSync);
410 PlatformTimerQuery time = gpuTimer->queueStop();
411
412 switch (gpuTimer->checkQueryStatus(previousTime)) {
413 using QueryStatus = sk_gpu_test::GpuTimer::QueryStatus;
414 case QueryStatus::kInvalid:
415 exitf(ExitErr::kUnavailable, "GPU timer failed");
416 break;
417 case QueryStatus::kPending:
418 exitf(ExitErr::kUnavailable, "timer query still not ready after fence sync");
419 break;
420 case QueryStatus::kDisjoint:
421 if (FLAGS_verbosity >= 4) {
422 fprintf(stderr, "discarding timer query due to disjoint operations.\n");
423 }
424 break;
425 case QueryStatus::kAccurate:
426 sample.fDuration += gpuTimer->getTimeElapsed(previousTime);
427 ++sample.fFrames;
428 break;
429 }
430 gpuTimer->deleteQuery(previousTime);
431 previousTime = time;
432 now = clock::now();
433 } while (now < sampleEndTime || 0 == sample.fFrames);
434 } while (now < endTime || 0 == samples->size() % 2);
435
436 gpuTimer->deleteQuery(previousTime);
437
438 // Make sure the gpu has finished all its work before we exit this function and delete the
439 // fence.
440 surface->flush();
441 context->submit(true);
442 }
443
print_result(const std::vector<Sample> & samples,const char * config,const char * bench)444 void print_result(const std::vector<Sample>& samples, const char* config, const char* bench) {
445 if (0 == (samples.size() % 2)) {
446 exitf(ExitErr::kSoftware, "attempted to gather stats on even number of samples");
447 }
448
449 if (FLAGS_dumpSamples) {
450 printf("Samples: ");
451 for (const Sample& sample : samples) {
452 printf("%" PRId64 " ", static_cast<int64_t>(sample.fDuration.count()));
453 }
454 printf("%s\n", bench);
455 }
456
457 Sample accum = Sample();
458 std::vector<double> values;
459 values.reserve(samples.size());
460 for (const Sample& sample : samples) {
461 accum.fFrames += sample.fFrames;
462 accum.fDuration += sample.fDuration;
463 values.push_back(sample.value());
464 }
465 std::sort(values.begin(), values.end());
466
467 const double accumValue = accum.value();
468 double variance = 0;
469 for (double value : values) {
470 const double delta = value - accumValue;
471 variance += delta * delta;
472 }
473 variance /= values.size();
474 // Technically, this is the relative standard deviation.
475 const double stddev = 100/*%*/ * sqrt(variance) / accumValue;
476
477 printf(resultFormat, accumValue, values[values.size() / 2], values.back(), values.front(),
478 stddev, values.size(), FLAGS_sampleMs, FLAGS_gpuClock ? "gpu" : "cpu", Sample::metric(),
479 config, bench);
480 printf("\n");
481 fflush(stdout);
482 }
483
main(int argc,char ** argv)484 int main(int argc, char** argv) {
485 CommandLineFlags::SetUsage(
486 "Use skpbench.py instead. "
487 "You usually don't want to use this program directly.");
488 CommandLineFlags::Parse(argc, argv);
489
490 if (!FLAGS_suppressHeader) {
491 printf("%s\n", header);
492 }
493 if (FLAGS_duration <= 0) {
494 exit(0); // This can be used to print the header and quit.
495 }
496
497 // Parse the config.
498 const SkCommandLineConfigGpu* config = nullptr; // Initialize for spurious warning.
499 SkCommandLineConfigArray configs;
500 ParseConfigs(FLAGS_config, &configs);
501 if (configs.count() != 1 || !(config = configs[0]->asConfigGpu())) {
502 exitf(ExitErr::kUsage, "invalid config '%s': must specify one (and only one) GPU config",
503 join(FLAGS_config).c_str());
504 }
505
506 // Parse the skp.
507 if (FLAGS_src.count() != 1) {
508 exitf(ExitErr::kUsage,
509 "invalid input '%s': must specify a single .skp or .svg file, or 'warmup'",
510 join(FLAGS_src).c_str());
511 }
512
513 SkGraphics::Init();
514
515 sk_sp<SkPicture> skp;
516 std::unique_ptr<MultiFrameSkp> mskp; // populated if the file is multi frame.
517 SkString srcname;
518 if (0 == strcmp(FLAGS_src[0], "warmup")) {
519 skp = create_warmup_skp();
520 srcname = "warmup";
521 } else {
522 SkString srcfile(FLAGS_src[0]);
523 std::unique_ptr<SkStream> srcstream(SkStream::MakeFromFile(srcfile.c_str()));
524 if (!srcstream) {
525 exitf(ExitErr::kIO, "failed to open file %s", srcfile.c_str());
526 }
527 if (srcfile.endsWith(".svg")) {
528 skp = create_skp_from_svg(srcstream.get(), srcfile.c_str());
529 } else if (srcfile.endsWith(".mskp")) {
530 mskp = MultiFrameSkp::MakeFromFile(srcfile);
531 // populate skp with it's first frame, for width height determination.
532 skp = mskp->frame(0);
533 } else {
534 skp = SkPicture::MakeFromStream(srcstream.get());
535 }
536 if (!skp) {
537 exitf(ExitErr::kData, "failed to parse file %s", srcfile.c_str());
538 }
539 srcname = SkOSPath::Basename(srcfile.c_str());
540 }
541 int width = std::min(SkScalarCeilToInt(skp->cullRect().width()), 2048),
542 height = std::min(SkScalarCeilToInt(skp->cullRect().height()), 2048);
543 if (FLAGS_verbosity >= 3 &&
544 (width != skp->cullRect().width() || height != skp->cullRect().height())) {
545 fprintf(stderr, "%s is too large (%ix%i), cropping to %ix%i.\n",
546 srcname.c_str(), SkScalarCeilToInt(skp->cullRect().width()),
547 SkScalarCeilToInt(skp->cullRect().height()), width, height);
548 }
549 if (FLAGS_scale != 1) {
550 width *= FLAGS_scale;
551 height *= FLAGS_scale;
552 if (FLAGS_verbosity >= 3) {
553 fprintf(stderr, "Scale factor of %.2f: scaling to %ix%i.\n",
554 FLAGS_scale, width, height);
555 }
556 }
557
558 if (config->getSurfType() != SkCommandLineConfigGpu::SurfType::kDefault) {
559 exitf(ExitErr::kUnavailable, "This tool only supports the default surface type. (%s)",
560 config->getTag().c_str());
561 }
562
563 // Create a context.
564 GrContextOptions ctxOptions;
565 SetCtxOptionsFromCommonFlags(&ctxOptions);
566 sk_gpu_test::GrContextFactory factory(ctxOptions);
567 sk_gpu_test::ContextInfo ctxInfo =
568 factory.getContextInfo(config->getContextType(), config->getContextOverrides());
569 auto ctx = ctxInfo.directContext();
570 if (!ctx) {
571 exitf(ExitErr::kUnavailable, "failed to create context for config %s",
572 config->getTag().c_str());
573 }
574 if (ctx->maxRenderTargetSize() < std::max(width, height)) {
575 exitf(ExitErr::kUnavailable, "render target size %ix%i not supported by platform (max: %i)",
576 width, height, ctx->maxRenderTargetSize());
577 }
578 GrBackendFormat format = ctx->defaultBackendFormat(config->getColorType(), GrRenderable::kYes);
579 if (!format.isValid()) {
580 exitf(ExitErr::kUnavailable, "failed to get GrBackendFormat from SkColorType: %d",
581 config->getColorType());
582 }
583 int supportedSampleCount = ctx->priv().caps()->getRenderTargetSampleCount(
584 config->getSamples(), format);
585 if (supportedSampleCount != config->getSamples()) {
586 exitf(ExitErr::kUnavailable, "sample count %i not supported by platform",
587 config->getSamples());
588 }
589 sk_gpu_test::TestContext* testCtx = ctxInfo.testContext();
590 if (!testCtx) {
591 exitf(ExitErr::kSoftware, "testContext is null");
592 }
593 if (!testCtx->fenceSyncSupport()) {
594 exitf(ExitErr::kUnavailable, "GPU does not support fence sync");
595 }
596
597 // Create a render target.
598 SkImageInfo info =
599 SkImageInfo::Make(width, height, config->getColorType(), config->getAlphaType(),
600 sk_ref_sp(config->getColorSpace()));
601 SkSurfaceProps props(config->getSurfaceFlags(), kRGB_H_SkPixelGeometry);
602 sk_sp<SkSurface> surface =
603 SkSurface::MakeRenderTarget(ctx, SkBudgeted::kNo, info, config->getSamples(), &props);
604 if (!surface) {
605 exitf(ExitErr::kUnavailable, "failed to create %ix%i render target for config %s",
606 width, height, config->getTag().c_str());
607 }
608
609 // Run the benchmark.
610 std::vector<Sample> samples;
611 if (FLAGS_sampleMs > 0) {
612 // +1 because we might take one more sample in order to have an odd number.
613 samples.reserve(1 + (FLAGS_duration + FLAGS_sampleMs - 1) / FLAGS_sampleMs);
614 } else {
615 samples.reserve(2 * FLAGS_duration);
616 }
617 SkCanvas* canvas = surface->getCanvas();
618 canvas->translate(-skp->cullRect().x(), -skp->cullRect().y());
619 if (FLAGS_scale != 1) {
620 canvas->scale(FLAGS_scale, FLAGS_scale);
621 }
622 if (!FLAGS_gpuClock) {
623 if (FLAGS_ddl) {
624 run_ddl_benchmark(testCtx, ctx, surface, skp.get(), &samples);
625 } else if (!mskp) {
626 auto s = std::make_unique<StaticSkp>(skp);
627 run_benchmark(ctx, surface.get(), s.get(), &samples);
628 } else {
629 run_benchmark(ctx, surface.get(), mskp.get(), &samples);
630 }
631 } else {
632 if (FLAGS_ddl) {
633 exitf(ExitErr::kUnavailable, "DDL: GPU-only timing not supported");
634 }
635 if (!testCtx->gpuTimingSupport()) {
636 exitf(ExitErr::kUnavailable, "GPU does not support timing");
637 }
638 run_gpu_time_benchmark(testCtx->gpuTimer(), ctx, surface.get(), skp.get(), &samples);
639 }
640 print_result(samples, config->getTag().c_str(), srcname.c_str());
641
642 // Save a proof (if one was requested).
643 if (!FLAGS_png.isEmpty()) {
644 SkBitmap bmp;
645 bmp.allocPixels(info);
646 if (!surface->getCanvas()->readPixels(bmp, 0, 0)) {
647 exitf(ExitErr::kUnavailable, "failed to read canvas pixels for png");
648 }
649 if (!mkdir_p(SkOSPath::Dirname(FLAGS_png[0]))) {
650 exitf(ExitErr::kIO, "failed to create directory for png \"%s\"", FLAGS_png[0]);
651 }
652 if (!ToolUtils::EncodeImageToFile(FLAGS_png[0], bmp, SkEncodedImageFormat::kPNG, 100)) {
653 exitf(ExitErr::kIO, "failed to save png to \"%s\"", FLAGS_png[0]);
654 }
655 }
656
657 return(0);
658 }
659
flush_with_sync(GrDirectContext * context,GpuSync & gpuSync)660 static void flush_with_sync(GrDirectContext* context, GpuSync& gpuSync) {
661 gpuSync.waitIfNeeded();
662
663 GrFlushInfo flushInfo;
664 flushInfo.fFinishedProc = sk_gpu_test::FlushFinishTracker::FlushFinished;
665 flushInfo.fFinishedContext = gpuSync.newFlushTracker(context);
666
667 context->flush(flushInfo);
668 context->submit();
669 }
670
draw_skp_and_flush_with_sync(GrDirectContext * context,SkSurface * surface,const SkPicture * skp,GpuSync & gpuSync)671 static void draw_skp_and_flush_with_sync(GrDirectContext* context, SkSurface* surface,
672 const SkPicture* skp, GpuSync& gpuSync) {
673 auto canvas = surface->getCanvas();
674 canvas->drawPicture(skp);
675
676 flush_with_sync(context, gpuSync);
677 }
678
create_warmup_skp()679 static sk_sp<SkPicture> create_warmup_skp() {
680 static constexpr SkRect bounds{0, 0, 500, 500};
681 SkPictureRecorder recorder;
682 SkCanvas* recording = recorder.beginRecording(bounds);
683
684 recording->clear(SK_ColorWHITE);
685
686 SkPaint stroke;
687 stroke.setStyle(SkPaint::kStroke_Style);
688 stroke.setStrokeWidth(2);
689
690 // Use a big path to (theoretically) warmup the CPU.
691 SkPath bigPath = ToolUtils::make_big_path();
692 recording->drawPath(bigPath, stroke);
693
694 // Use a perlin shader to warmup the GPU.
695 SkPaint perlin;
696 perlin.setShader(SkPerlinNoiseShader::MakeTurbulence(0.1f, 0.1f, 1, 0, nullptr));
697 recording->drawRect(bounds, perlin);
698
699 return recorder.finishRecordingAsPicture();
700 }
701
create_skp_from_svg(SkStream * stream,const char * filename)702 static sk_sp<SkPicture> create_skp_from_svg(SkStream* stream, const char* filename) {
703 #ifdef SK_XML
704 sk_sp<SkSVGDOM> svg = SkSVGDOM::MakeFromStream(*stream);
705 if (!svg) {
706 exitf(ExitErr::kData, "failed to build svg dom from file %s", filename);
707 }
708
709 static constexpr SkRect bounds{0, 0, 1200, 1200};
710 SkPictureRecorder recorder;
711 SkCanvas* recording = recorder.beginRecording(bounds);
712
713 svg->setContainerSize(SkSize::Make(recording->getBaseLayerSize()));
714 svg->render(recording);
715
716 return recorder.finishRecordingAsPicture();
717 #endif
718 exitf(ExitErr::kData, "SK_XML is disabled; cannot open svg file %s", filename);
719 return nullptr;
720 }
721
mkdir_p(const SkString & dirname)722 bool mkdir_p(const SkString& dirname) {
723 if (dirname.isEmpty() || dirname == SkString("/")) {
724 return true;
725 }
726 return mkdir_p(SkOSPath::Dirname(dirname.c_str())) && sk_mkdir(dirname.c_str());
727 }
728
join(const CommandLineFlags::StringArray & stringArray)729 static SkString join(const CommandLineFlags::StringArray& stringArray) {
730 SkString joined;
731 for (int i = 0; i < stringArray.count(); ++i) {
732 joined.appendf(i ? " %s" : "%s", stringArray[i]);
733 }
734 return joined;
735 }
736
exitf(ExitErr err,const char * format,...)737 static void exitf(ExitErr err, const char* format, ...) {
738 fprintf(stderr, ExitErr::kSoftware == err ? "INTERNAL ERROR: " : "ERROR: ");
739 va_list args;
740 va_start(args, format);
741 vfprintf(stderr, format, args);
742 va_end(args);
743 fprintf(stderr, ExitErr::kSoftware == err ? "; this should never happen.\n": ".\n");
744 exit((int)err);
745 }
746
waitIfNeeded()747 void GpuSync::waitIfNeeded() {
748 if (fFinishTrackers[fCurrentFlushIdx]) {
749 fFinishTrackers[fCurrentFlushIdx]->waitTillFinished();
750 }
751 }
752
newFlushTracker(GrDirectContext * context)753 sk_gpu_test::FlushFinishTracker* GpuSync::newFlushTracker(GrDirectContext* context) {
754 fFinishTrackers[fCurrentFlushIdx].reset(new sk_gpu_test::FlushFinishTracker(context));
755
756 sk_gpu_test::FlushFinishTracker* tracker = fFinishTrackers[fCurrentFlushIdx].get();
757 // We add an additional ref to the current flush tracker here. This ref is owned by the finish
758 // callback on the flush call. The finish callback will unref the tracker when called.
759 tracker->ref();
760
761 fCurrentFlushIdx = (fCurrentFlushIdx + 1) % SK_ARRAY_COUNT(fFinishTrackers);
762 return tracker;
763 }
764