1 /*
2  * Copyright 2015 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "Benchmark.h"
9 #include "SkCanvas.h"
10 #include "SkImageEncoder.h"
11 
12 #if SK_SUPPORT_GPU
13 #include "GLBench.h"
14 #include "GrShaderCaps.h"
15 #include "GrShaderVar.h"
16 #include "gl/GrGLContext.h"
17 #include "gl/GrGLInterface.h"
18 #include "gl/GrGLUtil.h"
19 #include "../private/GrGLSL.h"
20 
21 /*
22  * This is a native GL benchmark for instanced arrays vs vertex buffer objects.  To benchmark this
23  * functionality, we draw n * kDrawMultipier triangles per run.  If this number is less than
24  * kNumTri then we do a single draw, either with instances, or drawArrays.  Otherwise we do
25  * multiple draws.
26  *
27  * Additionally, there is a divisor, which if > 0 will act as a multiplier for the number of draws
28  * issued.
29  */
30 
31 class GLCpuPosInstancedArraysBench : public GLBench {
32 public:
33     /*
34      * Clients can decide to use either:
35      * kUseOne_VboSetup      - one vertex buffer with colors and positions interleaved
36      * kUseTwo_VboSetup      - two vertex buffers, one for colors, one for positions
37      * kUseInstance_VboSetup - two vertex buffers, one with per vertex indices, one with per
38      *                         instance colors
39      */
40     enum VboSetup {
41         kUseOne_VboSetup,
42         kUseTwo_VboSetup,
43         kUseInstance_VboSetup,
44     };
45 
46     /*
47      * drawDiv will act as a multiplier for the number of draws we issue if > 0. ie, 2 will issue
48      * 2x as many draws, 4 will issue 4x as many draws etc.  There is a limit however, which is
49      * kDrawMultipier.
50      */
GLCpuPosInstancedArraysBench(VboSetup vboSetup,int32_t drawDiv)51     GLCpuPosInstancedArraysBench(VboSetup vboSetup, int32_t drawDiv)
52         : fVboSetup(vboSetup)
53         , fDrawDiv(drawDiv)
54         , fProgram(0)
55         , fVAO(0) {
56         fName = VboSetupToStr(vboSetup, fDrawDiv);
57     }
58 
59 protected:
onGetName()60     const char* onGetName() override {
61         return fName.c_str();
62     }
63 
64     const GrGLContext* onGetGLContext(const GrGLContext*) override;
65     void setup(const GrGLContext*) override;
66     void glDraw(int loops, const GrGLContext*) override;
67     void teardown(const GrGLInterface*) override;
68 
69 private:
70     void setupInstanceVbo(const GrGLInterface*, const SkMatrix*);
71     void setupDoubleVbo(const GrGLInterface*, const SkMatrix*);
72     void setupSingleVbo(const GrGLInterface*, const SkMatrix*);
73     GrGLuint setupShader(const GrGLContext*);
74 
VboSetupToStr(VboSetup vboSetup,uint32_t drawDiv)75     static SkString VboSetupToStr(VboSetup vboSetup, uint32_t drawDiv) {
76         SkString name("GLInstancedArraysBench");
77         switch (vboSetup) {
78             default:
79             case kUseOne_VboSetup:
80                 name.appendf("_one_%u", drawDiv);
81                 break;
82             case kUseTwo_VboSetup:
83                 name.appendf("_two_%u", drawDiv);
84                 break;
85             case kUseInstance_VboSetup:
86                 name.append("_instance");
87                 break;
88         }
89         return name;
90     }
91 
92     static const GrGLuint kScreenWidth = 800;
93     static const GrGLuint kScreenHeight = 600;
94     static const uint32_t kNumTri = 10000;
95     static const uint32_t kVerticesPerTri = 3;
96     static const uint32_t kDrawMultiplier = 512;
97 
98     SkString fName;
99     VboSetup fVboSetup;
100     uint32_t fDrawDiv;
101     SkTArray<GrGLuint> fBuffers;
102     GrGLuint fProgram;
103     GrGLuint fVAO;
104     GrGLuint fTexture;
105 };
106 
107 ///////////////////////////////////////////////////////////////////////////////////////////////////
108 
setupShader(const GrGLContext * ctx)109 GrGLuint GLCpuPosInstancedArraysBench::setupShader(const GrGLContext* ctx) {
110     const GrShaderCaps* shaderCaps = ctx->caps()->shaderCaps();
111     const char* version = shaderCaps->versionDeclString();
112 
113     // setup vertex shader
114     GrShaderVar aPosition("a_position", kVec2f_GrSLType, GrShaderVar::kIn_TypeModifier);
115     GrShaderVar aColor("a_color", kVec3f_GrSLType, GrShaderVar::kIn_TypeModifier);
116     GrShaderVar oColor("o_color", kVec3f_GrSLType, GrShaderVar::kOut_TypeModifier);
117 
118     SkString vshaderTxt(version);
119     aPosition.appendDecl(shaderCaps, &vshaderTxt);
120     vshaderTxt.append(";\n");
121     aColor.appendDecl(shaderCaps, &vshaderTxt);
122     vshaderTxt.append(";\n");
123     oColor.appendDecl(shaderCaps, &vshaderTxt);
124     vshaderTxt.append(";\n");
125 
126     vshaderTxt.append(
127             "void main()\n"
128             "{\n"
129                 "gl_Position = vec4(a_position, 0., 1.);\n"
130                 "o_color = a_color;\n"
131             "}\n");
132 
133     // setup fragment shader
134     GrShaderVar oFragColor("o_FragColor", kVec4f_GrSLType, GrShaderVar::kOut_TypeModifier);
135     SkString fshaderTxt(version);
136     GrGLSLAppendDefaultFloatPrecisionDeclaration(kDefault_GrSLPrecision, *shaderCaps, &fshaderTxt);
137     oColor.setTypeModifier(GrShaderVar::kIn_TypeModifier);
138     oColor.appendDecl(shaderCaps, &fshaderTxt);
139     fshaderTxt.append(";\n");
140 
141     const char* fsOutName;
142     if (shaderCaps->mustDeclareFragmentShaderOutput()) {
143         oFragColor.appendDecl(shaderCaps, &fshaderTxt);
144         fshaderTxt.append(";\n");
145         fsOutName = oFragColor.c_str();
146     } else {
147         fsOutName = "gl_FragColor";
148     }
149 
150     fshaderTxt.appendf(
151             "void main()\n"
152             "{\n"
153                 "%s = vec4(o_color, 1.0);\n"
154             "}\n", fsOutName);
155 
156     return CreateProgram(ctx, vshaderTxt.c_str(), fshaderTxt.c_str());
157 }
158 
159 template<typename Func>
setup_matrices(int numQuads,Func f)160 static void setup_matrices(int numQuads, Func f) {
161     // We draw a really small triangle so we are not fill rate limited
162     for (int i = 0 ; i < numQuads; i++) {
163         SkMatrix m = SkMatrix::I();
164         m.setScale(0.0001f, 0.0001f);
165         f(m);
166     }
167 }
168 
169 ///////////////////////////////////////////////////////////////////////////////////////////////////
170 
onGetGLContext(const GrGLContext * ctx)171 const GrGLContext* GLCpuPosInstancedArraysBench::onGetGLContext(const GrGLContext* ctx) {
172     // We only care about gpus with drawArraysInstanced support
173     if (!ctx->interface()->fFunctions.fDrawArraysInstanced) {
174         return nullptr;
175     }
176     return ctx;
177 }
178 
setupInstanceVbo(const GrGLInterface * gl,const SkMatrix * viewMatrices)179 void GLCpuPosInstancedArraysBench::setupInstanceVbo(const GrGLInterface* gl,
180                                                     const SkMatrix* viewMatrices) {
181     // We draw all of the instances at a single place because we aren't allowed to have per vertex
182     // per instance attributes
183     SkPoint positions[kVerticesPerTri];
184     positions[0].set(-1.0f, -1.0f);
185     positions[1].set( 1.0f, -1.0f);
186     positions[2].set( 1.0f,  1.0f);
187     viewMatrices[0].mapPointsWithStride(positions, sizeof(SkPoint), kVerticesPerTri);
188 
189     // setup colors so we can detect we are actually drawing instances(the last triangle will be
190     // a different color)
191     GrGLfloat colors[kVerticesPerTri * kNumTri];
192     for (uint32_t i = 0; i < kNumTri; i++) {
193         // set colors
194         uint32_t offset = i * kVerticesPerTri;
195         float color = i == kNumTri - 1 ? 1.0f : 0.0f;
196         colors[offset++] = color; colors[offset++] = 0.0f; colors[offset++] = 0.0f;
197     }
198 
199     GrGLuint posVBO;
200     // setup position VBO
201     GR_GL_CALL(gl, GenBuffers(1, &posVBO));
202     GR_GL_CALL(gl, BindBuffer(GR_GL_ARRAY_BUFFER, posVBO));
203     GR_GL_CALL(gl, BufferData(GR_GL_ARRAY_BUFFER, sizeof(positions), positions, GR_GL_STATIC_DRAW));
204     GR_GL_CALL(gl, EnableVertexAttribArray(0));
205     GR_GL_CALL(gl, VertexAttribPointer(0, 2, GR_GL_FLOAT, GR_GL_FALSE, 2 * sizeof(GrGLfloat),
206                                        (GrGLvoid*)0));
207 
208     // setup color VBO
209     GrGLuint instanceVBO;
210     GR_GL_CALL(gl, GenBuffers(1, &instanceVBO));
211     GR_GL_CALL(gl, BindBuffer(GR_GL_ARRAY_BUFFER, instanceVBO));
212     GR_GL_CALL(gl, BufferData(GR_GL_ARRAY_BUFFER, sizeof(colors), colors, GR_GL_STATIC_DRAW));
213     GR_GL_CALL(gl, EnableVertexAttribArray(1));
214     GR_GL_CALL(gl, VertexAttribPointer(1, 3, GR_GL_FLOAT, GR_GL_FALSE, 3 * sizeof(GrGLfloat),
215                                        (GrGLvoid*)0));
216     GR_GL_CALL(gl, VertexAttribDivisor(1, 1));
217     fBuffers.push_back(posVBO);
218     fBuffers.push_back(instanceVBO);
219 }
220 
setupDoubleVbo(const GrGLInterface * gl,const SkMatrix * viewMatrices)221 void GLCpuPosInstancedArraysBench::setupDoubleVbo(const GrGLInterface* gl,
222                                                   const SkMatrix* viewMatrices) {
223     // Constants for our various shader programs
224     SkPoint positions[kVerticesPerTri * kNumTri];
225     GrGLfloat colors[kVerticesPerTri * kNumTri * 3];
226     for (uint32_t i = 0; i < kNumTri; i++) {
227         SkPoint* position = &positions[i * kVerticesPerTri];
228         position[0].set(-1.0f, -1.0f);
229         position[1].set( 1.0f, -1.0f);
230         position[2].set( 1.0f,  1.0f);
231         viewMatrices[i].mapPointsWithStride(position, sizeof(SkPoint), kVerticesPerTri);
232 
233         // set colors
234         float color = i == kNumTri - 1 ? 1.0f : 0.0f;
235         uint32_t offset = i * kVerticesPerTri * 3;
236         for (uint32_t j = 0; j < kVerticesPerTri; j++) {
237             colors[offset++] = color; colors[offset++] = 0.0f; colors[offset++] = 0.0f;
238         }
239     }
240 
241     GrGLuint posVBO, colorVBO;
242     // setup position VBO
243     GR_GL_CALL(gl, GenBuffers(1, &posVBO));
244     GR_GL_CALL(gl, BindBuffer(GR_GL_ARRAY_BUFFER, posVBO));
245     GR_GL_CALL(gl, EnableVertexAttribArray(0));
246     GR_GL_CALL(gl, VertexAttribPointer(0, 2, GR_GL_FLOAT, GR_GL_FALSE, 2 * sizeof(GrGLfloat),
247                                        (GrGLvoid*)0));
248     GR_GL_CALL(gl, BufferData(GR_GL_ARRAY_BUFFER, sizeof(positions), positions, GR_GL_STATIC_DRAW));
249 
250     // setup color VBO
251     GR_GL_CALL(gl, GenBuffers(1, &colorVBO));
252     GR_GL_CALL(gl, BindBuffer(GR_GL_ARRAY_BUFFER, colorVBO));
253     GR_GL_CALL(gl, EnableVertexAttribArray(1));
254     GR_GL_CALL(gl, VertexAttribPointer(1, 3, GR_GL_FLOAT, GR_GL_FALSE, 3 * sizeof(GrGLfloat),
255                                        (GrGLvoid*)0));
256     GR_GL_CALL(gl, BufferData(GR_GL_ARRAY_BUFFER, sizeof(colors), colors, GR_GL_STATIC_DRAW));
257 
258     fBuffers.push_back(posVBO);
259     fBuffers.push_back(colorVBO);
260 }
261 
262 struct Vertex {
263     SkPoint fPositions;
264     GrGLfloat fColors[3];
265 };
266 
setupSingleVbo(const GrGLInterface * gl,const SkMatrix * viewMatrices)267 void GLCpuPosInstancedArraysBench::setupSingleVbo(const GrGLInterface* gl,
268                                                   const SkMatrix* viewMatrices) {
269     // Constants for our various shader programs
270     Vertex vertices[kVerticesPerTri * kNumTri];
271     for (uint32_t i = 0; i < kNumTri; i++) {
272         Vertex* v = &vertices[i * kVerticesPerTri];
273         v[0].fPositions.set(-1.0f, -1.0f);
274         v[1].fPositions.set( 1.0f, -1.0f);
275         v[2].fPositions.set( 1.0f,  1.0f);
276 
277         SkPoint* position = reinterpret_cast<SkPoint*>(v);
278         viewMatrices[i].mapPointsWithStride(position, sizeof(Vertex), kVerticesPerTri);
279 
280         // set colors
281         float color = i == kNumTri - 1 ? 1.0f : 0.0f;
282         for (uint32_t j = 0; j < kVerticesPerTri; j++) {
283             uint32_t offset = 0;
284             v->fColors[offset++] = color; v->fColors[offset++] = 0.0f; v->fColors[offset++] = 0.0f;
285             v++;
286         }
287     }
288 
289     GrGLuint vbo;
290     // setup VBO
291     GR_GL_CALL(gl, GenBuffers(1, &vbo));
292     GR_GL_CALL(gl, BindBuffer(GR_GL_ARRAY_BUFFER, vbo));
293     GR_GL_CALL(gl, EnableVertexAttribArray(0));
294     GR_GL_CALL(gl, EnableVertexAttribArray(1));
295     GR_GL_CALL(gl, VertexAttribPointer(0, 2, GR_GL_FLOAT, GR_GL_FALSE, sizeof(Vertex),
296                                        (GrGLvoid*)0));
297     GR_GL_CALL(gl, VertexAttribPointer(1, 3, GR_GL_FLOAT, GR_GL_FALSE, sizeof(Vertex),
298                                        (GrGLvoid*)(sizeof(SkPoint))));
299     GR_GL_CALL(gl, BufferData(GR_GL_ARRAY_BUFFER, sizeof(vertices), vertices, GR_GL_STATIC_DRAW));
300     fBuffers.push_back(vbo);
301 }
302 
setup(const GrGLContext * ctx)303 void GLCpuPosInstancedArraysBench::setup(const GrGLContext* ctx) {
304     const GrGLInterface* gl = ctx->interface();
305     fTexture = SetupFramebuffer(gl, kScreenWidth, kScreenHeight);
306 
307     fProgram = this->setupShader(ctx);
308 
309     // setup matrices
310     int index = 0;
311     SkMatrix viewMatrices[kNumTri];
312     setup_matrices(kNumTri, [&index, &viewMatrices](const SkMatrix& m) {
313         viewMatrices[index++] = m;
314     });
315 
316     // setup VAO
317     GR_GL_CALL(gl, GenVertexArrays(1, &fVAO));
318     GR_GL_CALL(gl, BindVertexArray(fVAO));
319 
320     switch (fVboSetup) {
321         case kUseOne_VboSetup:
322             this->setupSingleVbo(gl, viewMatrices);
323             break;
324         case kUseTwo_VboSetup:
325             this->setupDoubleVbo(gl, viewMatrices);
326             break;
327         case kUseInstance_VboSetup:
328             this->setupInstanceVbo(gl, viewMatrices);
329             break;
330     }
331 
332     // clear screen
333     GR_GL_CALL(gl, ClearColor(0.03f, 0.03f, 0.03f, 1.0f));
334     GR_GL_CALL(gl, Clear(GR_GL_COLOR_BUFFER_BIT));
335 
336     // set us up to draw
337     GR_GL_CALL(gl, UseProgram(fProgram));
338     GR_GL_CALL(gl, BindVertexArray(fVAO));
339 }
340 
glDraw(int loops,const GrGLContext * ctx)341 void GLCpuPosInstancedArraysBench::glDraw(int loops, const GrGLContext* ctx) {
342     const GrGLInterface* gl = ctx->interface();
343 
344     uint32_t maxTrianglesPerFlush = fDrawDiv == 0 ?  kNumTri :
345                                                      kDrawMultiplier / fDrawDiv;
346     uint32_t trianglesToDraw = loops * kDrawMultiplier;
347 
348     if (kUseInstance_VboSetup == fVboSetup) {
349         while (trianglesToDraw > 0) {
350             uint32_t triangles = SkTMin(trianglesToDraw, maxTrianglesPerFlush);
351             GR_GL_CALL(gl, DrawArraysInstanced(GR_GL_TRIANGLES, 0, kVerticesPerTri, triangles));
352             trianglesToDraw -= triangles;
353         }
354     } else {
355         while (trianglesToDraw > 0) {
356             uint32_t triangles = SkTMin(trianglesToDraw, maxTrianglesPerFlush);
357             GR_GL_CALL(gl, DrawArrays(GR_GL_TRIANGLES, 0, kVerticesPerTri * triangles));
358             trianglesToDraw -= triangles;
359         }
360     }
361 
362 #if 0
363     //const char* filename = "/data/local/tmp/out.png";
364     SkString filename("out");
365     filename.appendf("_%s.png", this->getName());
366     DumpImage(gl, kScreenWidth, kScreenHeight, filename.c_str());
367 #endif
368 }
369 
teardown(const GrGLInterface * gl)370 void GLCpuPosInstancedArraysBench::teardown(const GrGLInterface* gl) {
371     GR_GL_CALL(gl, BindBuffer(GR_GL_ARRAY_BUFFER, 0));
372     GR_GL_CALL(gl, BindVertexArray(0));
373     GR_GL_CALL(gl, BindTexture(GR_GL_TEXTURE_2D, 0));
374     GR_GL_CALL(gl, BindFramebuffer(GR_GL_FRAMEBUFFER, 0));
375     GR_GL_CALL(gl, DeleteTextures(1, &fTexture));
376     GR_GL_CALL(gl, DeleteProgram(fProgram));
377     GR_GL_CALL(gl, DeleteBuffers(fBuffers.count(), fBuffers.begin()));
378     GR_GL_CALL(gl, DeleteVertexArrays(1, &fVAO));
379     fBuffers.reset();
380 }
381 
382 ///////////////////////////////////////////////////////////////////////////////
383 
384 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseInstance_VboSetup, 0) )
385 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseOne_VboSetup, 0) )
386 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseTwo_VboSetup, 0) )
387 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseOne_VboSetup, 1) )
388 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseTwo_VboSetup, 1) )
389 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseOne_VboSetup, 2) )
390 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseTwo_VboSetup, 2) )
391 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseOne_VboSetup, 4) )
392 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseTwo_VboSetup, 4) )
393 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseOne_VboSetup, 8) )
394 DEF_BENCH( return new GLCpuPosInstancedArraysBench(GLCpuPosInstancedArraysBench::kUseTwo_VboSetup, 8) )
395 
396 #endif
397