1 // Copyright (C) 2019 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <random>
16 
17 #include <benchmark/benchmark.h>
18 
19 #include "src/trace_processor/tables/macros.h"
20 
21 namespace perfetto {
22 namespace trace_processor {
23 namespace {
24 
25 #define PERFETTO_TP_ROOT_TEST_TABLE(NAME, PARENT, C) \
26   NAME(RootTestTable, "root_table")                  \
27   PERFETTO_TP_ROOT_TABLE(PARENT, C)                  \
28   C(uint32_t, root_sorted, Column::Flag::kSorted)    \
29   C(uint32_t, root_non_null)                         \
30   C(uint32_t, root_non_null_2)                       \
31   C(base::Optional<uint32_t>, root_nullable)
32 
33 PERFETTO_TP_TABLE(PERFETTO_TP_ROOT_TEST_TABLE);
34 
35 #define PERFETTO_TP_CHILD_TABLE(NAME, PARENT, C)   \
36   NAME(ChildTestTable, "child_table")              \
37   PARENT(PERFETTO_TP_ROOT_TEST_TABLE, C)           \
38   C(uint32_t, child_sorted, Column::Flag::kSorted) \
39   C(uint32_t, child_non_null)                      \
40   C(base::Optional<uint32_t>, child_nullable)
41 
42 PERFETTO_TP_TABLE(PERFETTO_TP_CHILD_TABLE);
43 
44 RootTestTable::~RootTestTable() = default;
45 ChildTestTable::~ChildTestTable() = default;
46 
47 }  // namespace
48 }  // namespace trace_processor
49 }  // namespace perfetto
50 
51 namespace {
52 
IsBenchmarkFunctionalOnly()53 bool IsBenchmarkFunctionalOnly() {
54   return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
55 }
56 
TableFilterArgs(benchmark::internal::Benchmark * b)57 void TableFilterArgs(benchmark::internal::Benchmark* b) {
58   if (IsBenchmarkFunctionalOnly()) {
59     b->Arg(1024);
60   } else {
61     b->RangeMultiplier(8);
62     b->Range(1024, 2 * 1024 * 1024);
63   }
64 }
65 
TableSortArgs(benchmark::internal::Benchmark * b)66 void TableSortArgs(benchmark::internal::Benchmark* b) {
67   if (IsBenchmarkFunctionalOnly()) {
68     b->Arg(64);
69   } else {
70     b->RangeMultiplier(8);
71     b->Range(1024, 256 * 1024);
72   }
73 }
74 
75 }  // namespace
76 
77 using perfetto::trace_processor::ChildTestTable;
78 using perfetto::trace_processor::RootTestTable;
79 using perfetto::trace_processor::RowMap;
80 using perfetto::trace_processor::SqlValue;
81 using perfetto::trace_processor::StringPool;
82 using perfetto::trace_processor::Table;
83 
BM_TableInsert(benchmark::State & state)84 static void BM_TableInsert(benchmark::State& state) {
85   StringPool pool;
86   RootTestTable root(&pool, nullptr);
87 
88   for (auto _ : state) {
89     benchmark::DoNotOptimize(root.Insert({}));
90   }
91 }
92 BENCHMARK(BM_TableInsert);
93 
BM_TableIteratorChild(benchmark::State & state)94 static void BM_TableIteratorChild(benchmark::State& state) {
95   StringPool pool;
96   RootTestTable root(&pool, nullptr);
97   ChildTestTable child(&pool, &root);
98 
99   uint32_t size = static_cast<uint32_t>(state.range(0));
100   for (uint32_t i = 0; i < size; ++i) {
101     child.Insert({});
102     root.Insert({});
103   }
104 
105   auto it = child.IterateRows();
106   for (auto _ : state) {
107     for (uint32_t i = 0; i < child.GetColumnCount(); ++i) {
108       benchmark::DoNotOptimize(it.Get(i));
109     }
110     it.Next();
111     if (!it)
112       it = child.IterateRows();
113   }
114 }
115 BENCHMARK(BM_TableIteratorChild)->Apply(TableFilterArgs);
116 
BM_TableFilterAndSortRoot(benchmark::State & state)117 static void BM_TableFilterAndSortRoot(benchmark::State& state) {
118   StringPool pool;
119   RootTestTable root(&pool, nullptr);
120 
121   uint32_t size = static_cast<uint32_t>(state.range(0));
122   uint32_t partitions = 8;
123 
124   std::minstd_rand0 rnd_engine(45);
125   for (uint32_t i = 0; i < size; ++i) {
126     RootTestTable::Row row;
127     row.root_non_null = rnd_engine() % partitions;
128     row.root_non_null_2 = static_cast<uint32_t>(rnd_engine());
129     root.Insert(row);
130   }
131 
132   for (auto _ : state) {
133     Table filtered = root.Filter({root.root_non_null().eq(5)},
134                                  RowMap::OptimizeFor::kLookupSpeed);
135     benchmark::DoNotOptimize(
136         filtered.Sort({root.root_non_null_2().ascending()}));
137   }
138 }
139 BENCHMARK(BM_TableFilterAndSortRoot)->Apply(TableFilterArgs);
140 
BM_TableFilterRootId(benchmark::State & state)141 static void BM_TableFilterRootId(benchmark::State& state) {
142   StringPool pool;
143   RootTestTable root(&pool, nullptr);
144 
145   uint32_t size = static_cast<uint32_t>(state.range(0));
146   for (uint32_t i = 0; i < size; ++i)
147     root.Insert({});
148 
149   for (auto _ : state) {
150     benchmark::DoNotOptimize(root.Filter({root.id().eq(30)}));
151   }
152 }
153 BENCHMARK(BM_TableFilterRootId)->Apply(TableFilterArgs);
154 
BM_TableFilterRootIdAndOther(benchmark::State & state)155 static void BM_TableFilterRootIdAndOther(benchmark::State& state) {
156   StringPool pool;
157   RootTestTable root(&pool, nullptr);
158 
159   uint32_t size = static_cast<uint32_t>(state.range(0));
160 
161   for (uint32_t i = 0; i < size; ++i) {
162     RootTestTable::Row root_row;
163     root_row.root_non_null = i * 4;
164     root.Insert(root_row);
165   }
166 
167   for (auto _ : state) {
168     benchmark::DoNotOptimize(root.Filter(
169         {root.id().eq(root.row_count() - 1), root.root_non_null().gt(100)}));
170   }
171 }
172 BENCHMARK(BM_TableFilterRootIdAndOther)->Apply(TableFilterArgs);
173 
BM_TableFilterChildId(benchmark::State & state)174 static void BM_TableFilterChildId(benchmark::State& state) {
175   StringPool pool;
176   RootTestTable root(&pool, nullptr);
177   ChildTestTable child(&pool, &root);
178 
179   uint32_t size = static_cast<uint32_t>(state.range(0));
180   for (uint32_t i = 0; i < size; ++i) {
181     root.Insert({});
182     child.Insert({});
183   }
184 
185   for (auto _ : state) {
186     benchmark::DoNotOptimize(child.Filter({child.id().eq(30)}));
187   }
188 }
189 BENCHMARK(BM_TableFilterChildId)->Apply(TableFilterArgs);
190 
BM_TableFilterChildIdAndSortedInRoot(benchmark::State & state)191 static void BM_TableFilterChildIdAndSortedInRoot(benchmark::State& state) {
192   StringPool pool;
193   RootTestTable root(&pool, nullptr);
194   ChildTestTable child(&pool, &root);
195 
196   uint32_t size = static_cast<uint32_t>(state.range(0));
197   for (uint32_t i = 0; i < size; ++i) {
198     RootTestTable::Row root_row;
199     root_row.root_sorted = i * 2;
200     root.Insert(root_row);
201 
202     ChildTestTable::Row child_row;
203     child_row.root_sorted = i * 2 + 1;
204     child.Insert(child_row);
205   }
206 
207   for (auto _ : state) {
208     benchmark::DoNotOptimize(
209         child.Filter({child.id().eq(30), child.root_sorted().gt(1024)}));
210   }
211 }
212 BENCHMARK(BM_TableFilterChildIdAndSortedInRoot)->Apply(TableFilterArgs);
213 
BM_TableFilterRootNonNullEqMatchMany(benchmark::State & state)214 static void BM_TableFilterRootNonNullEqMatchMany(benchmark::State& state) {
215   StringPool pool;
216   RootTestTable root(&pool, nullptr);
217 
218   uint32_t size = static_cast<uint32_t>(state.range(0));
219   uint32_t partitions = size / 1024;
220 
221   std::minstd_rand0 rnd_engine;
222   for (uint32_t i = 0; i < size; ++i) {
223     RootTestTable::Row row(static_cast<uint32_t>(rnd_engine() % partitions));
224     root.Insert(row);
225   }
226 
227   for (auto _ : state) {
228     benchmark::DoNotOptimize(root.Filter({root.root_non_null().eq(0)}));
229   }
230 }
231 BENCHMARK(BM_TableFilterRootNonNullEqMatchMany)->Apply(TableFilterArgs);
232 
BM_TableFilterRootMultipleNonNull(benchmark::State & state)233 static void BM_TableFilterRootMultipleNonNull(benchmark::State& state) {
234   StringPool pool;
235   RootTestTable root(&pool, nullptr);
236 
237   uint32_t size = static_cast<uint32_t>(state.range(0));
238   uint32_t partitions = size / 512;
239 
240   std::minstd_rand0 rnd_engine;
241   for (uint32_t i = 0; i < size; ++i) {
242     RootTestTable::Row row;
243     row.root_non_null = rnd_engine() % partitions;
244     row.root_non_null_2 = rnd_engine() % partitions;
245     root.Insert(row);
246   }
247 
248   for (auto _ : state) {
249     benchmark::DoNotOptimize(root.Filter(
250         {root.root_non_null().lt(4), root.root_non_null_2().lt(10)}));
251   }
252 }
253 BENCHMARK(BM_TableFilterRootMultipleNonNull)->Apply(TableFilterArgs);
254 
BM_TableFilterRootNullableEqMatchMany(benchmark::State & state)255 static void BM_TableFilterRootNullableEqMatchMany(benchmark::State& state) {
256   StringPool pool;
257   RootTestTable root(&pool, nullptr);
258 
259   uint32_t size = static_cast<uint32_t>(state.range(0));
260   uint32_t partitions = size / 512;
261 
262   std::minstd_rand0 rnd_engine;
263   for (uint32_t i = 0; i < size; ++i) {
264     uint32_t value = rnd_engine() % partitions;
265 
266     RootTestTable::Row row;
267     row.root_nullable = value % 2 == 0 ? perfetto::base::nullopt
268                                        : perfetto::base::make_optional(value);
269     root.Insert(row);
270   }
271 
272   for (auto _ : state) {
273     benchmark::DoNotOptimize(root.Filter({root.root_nullable().eq(1)}));
274   }
275 }
276 BENCHMARK(BM_TableFilterRootNullableEqMatchMany)->Apply(TableFilterArgs);
277 
BM_TableFilterChildNonNullEqMatchMany(benchmark::State & state)278 static void BM_TableFilterChildNonNullEqMatchMany(benchmark::State& state) {
279   StringPool pool;
280   RootTestTable root(&pool, nullptr);
281   ChildTestTable child(&pool, &root);
282 
283   uint32_t size = static_cast<uint32_t>(state.range(0));
284   uint32_t partitions = size / 1024;
285 
286   std::minstd_rand0 rnd_engine;
287   for (uint32_t i = 0; i < size; ++i) {
288     ChildTestTable::Row row;
289     row.child_non_null = static_cast<uint32_t>(rnd_engine() % partitions);
290     root.Insert({});
291     child.Insert(row);
292   }
293 
294   for (auto _ : state) {
295     benchmark::DoNotOptimize(child.Filter({child.child_non_null().eq(0)}));
296   }
297 }
298 BENCHMARK(BM_TableFilterChildNonNullEqMatchMany)->Apply(TableFilterArgs);
299 
BM_TableFilterChildNullableEqMatchMany(benchmark::State & state)300 static void BM_TableFilterChildNullableEqMatchMany(benchmark::State& state) {
301   StringPool pool;
302   RootTestTable root(&pool, nullptr);
303   ChildTestTable child(&pool, &root);
304 
305   uint32_t size = static_cast<uint32_t>(state.range(0));
306   uint32_t partitions = size / 512;
307 
308   std::minstd_rand0 rnd_engine;
309   for (uint32_t i = 0; i < size; ++i) {
310     uint32_t value = rnd_engine() % partitions;
311 
312     ChildTestTable::Row row;
313     row.child_nullable = value % 2 == 0 ? perfetto::base::nullopt
314                                         : perfetto::base::make_optional(value);
315     root.Insert({});
316     child.Insert(row);
317   }
318 
319   for (auto _ : state) {
320     benchmark::DoNotOptimize(child.Filter({child.child_nullable().eq(1)}));
321   }
322 }
323 BENCHMARK(BM_TableFilterChildNullableEqMatchMany)->Apply(TableFilterArgs);
324 
BM_TableFilterChildNonNullEqMatchManyInParent(benchmark::State & state)325 static void BM_TableFilterChildNonNullEqMatchManyInParent(
326     benchmark::State& state) {
327   StringPool pool;
328   RootTestTable root(&pool, nullptr);
329   ChildTestTable child(&pool, &root);
330 
331   uint32_t size = static_cast<uint32_t>(state.range(0));
332   uint32_t partitions = size / 1024;
333 
334   std::minstd_rand0 rnd_engine;
335   for (uint32_t i = 0; i < size; ++i) {
336     ChildTestTable::Row row;
337     row.root_non_null = static_cast<uint32_t>(rnd_engine() % partitions);
338     root.Insert({});
339     child.Insert(row);
340   }
341 
342   for (auto _ : state) {
343     benchmark::DoNotOptimize(child.Filter({child.root_non_null().eq(0)}));
344   }
345 }
346 BENCHMARK(BM_TableFilterChildNonNullEqMatchManyInParent)
347     ->Apply(TableFilterArgs);
348 
BM_TableFilterChildNullableEqMatchManyInParent(benchmark::State & state)349 static void BM_TableFilterChildNullableEqMatchManyInParent(
350     benchmark::State& state) {
351   StringPool pool;
352   RootTestTable root(&pool, nullptr);
353   ChildTestTable child(&pool, &root);
354 
355   uint32_t size = static_cast<uint32_t>(state.range(0));
356   uint32_t partitions = size / 512;
357 
358   std::minstd_rand0 rnd_engine;
359   for (uint32_t i = 0; i < size; ++i) {
360     ChildTestTable::Row row;
361     row.root_nullable = static_cast<uint32_t>(rnd_engine() % partitions);
362     root.Insert({});
363     child.Insert(row);
364   }
365 
366   for (auto _ : state) {
367     benchmark::DoNotOptimize(child.Filter({child.root_nullable().eq(1)}));
368   }
369 }
370 BENCHMARK(BM_TableFilterChildNullableEqMatchManyInParent)
371     ->Apply(TableFilterArgs);
372 
BM_TableFilterParentSortedEq(benchmark::State & state)373 static void BM_TableFilterParentSortedEq(benchmark::State& state) {
374   StringPool pool;
375   RootTestTable root(&pool, nullptr);
376 
377   uint32_t size = static_cast<uint32_t>(state.range(0));
378 
379   for (uint32_t i = 0; i < size; ++i) {
380     RootTestTable::Row row;
381     row.root_sorted = i * 2;
382     root.Insert(row);
383   }
384 
385   for (auto _ : state) {
386     benchmark::DoNotOptimize(root.Filter({root.root_sorted().eq(22)}));
387   }
388 }
389 BENCHMARK(BM_TableFilterParentSortedEq)->Apply(TableFilterArgs);
390 
BM_TableFilterParentSortedAndOther(benchmark::State & state)391 static void BM_TableFilterParentSortedAndOther(benchmark::State& state) {
392   StringPool pool;
393   RootTestTable root(&pool, nullptr);
394 
395   uint32_t size = static_cast<uint32_t>(state.range(0));
396 
397   for (uint32_t i = 0; i < size; ++i) {
398     // Group the rows into rows of 10. This emulates the behaviour of e.g.
399     // args.
400     RootTestTable::Row row;
401     row.root_sorted = (i / 10) * 10;
402     row.root_non_null = i;
403     root.Insert(row);
404   }
405 
406   // We choose to search for the last group as if there is O(n^2), it will
407   // be more easily visible.
408   uint32_t last_group = ((size - 1) / 10) * 10;
409   for (auto _ : state) {
410     benchmark::DoNotOptimize(root.Filter({root.root_sorted().eq(last_group),
411                                           root.root_non_null().eq(size - 1)}));
412   }
413 }
414 BENCHMARK(BM_TableFilterParentSortedAndOther)->Apply(TableFilterArgs);
415 
BM_TableFilterChildSortedEq(benchmark::State & state)416 static void BM_TableFilterChildSortedEq(benchmark::State& state) {
417   StringPool pool;
418   RootTestTable root(&pool, nullptr);
419   ChildTestTable child(&pool, &root);
420 
421   uint32_t size = static_cast<uint32_t>(state.range(0));
422 
423   for (uint32_t i = 0; i < size; ++i) {
424     ChildTestTable::Row row;
425     row.child_sorted = i * 2;
426     root.Insert({});
427     child.Insert(row);
428   }
429 
430   for (auto _ : state) {
431     benchmark::DoNotOptimize(child.Filter({child.child_sorted().eq(22)}));
432   }
433 }
434 BENCHMARK(BM_TableFilterChildSortedEq)->Apply(TableFilterArgs);
435 
BM_TableFilterChildSortedEqInParent(benchmark::State & state)436 static void BM_TableFilterChildSortedEqInParent(benchmark::State& state) {
437   StringPool pool;
438   RootTestTable root(&pool, nullptr);
439   ChildTestTable child(&pool, &root);
440 
441   uint32_t size = static_cast<uint32_t>(state.range(0));
442 
443   for (uint32_t i = 0; i < size; ++i) {
444     RootTestTable::Row root_row;
445     root_row.root_sorted = i * 4;
446     root.Insert(root_row);
447 
448     ChildTestTable::Row row;
449     row.root_sorted = i * 4 + 2;
450     child.Insert(row);
451   }
452 
453   for (auto _ : state) {
454     benchmark::DoNotOptimize(child.Filter({child.root_sorted().eq(22)}));
455   }
456 }
457 BENCHMARK(BM_TableFilterChildSortedEqInParent)->Apply(TableFilterArgs);
458 
BM_TableSortRootNonNull(benchmark::State & state)459 static void BM_TableSortRootNonNull(benchmark::State& state) {
460   StringPool pool;
461   RootTestTable root(&pool, nullptr);
462 
463   uint32_t size = static_cast<uint32_t>(state.range(0));
464 
465   std::minstd_rand0 rnd_engine;
466   for (uint32_t i = 0; i < size; ++i) {
467     const uint32_t root_value = static_cast<uint32_t>(rnd_engine());
468 
469     RootTestTable::Row row;
470     row.root_non_null = root_value;
471     root.Insert(row);
472   }
473 
474   for (auto _ : state) {
475     benchmark::DoNotOptimize(root.Sort({root.root_non_null().ascending()}));
476   }
477 }
478 BENCHMARK(BM_TableSortRootNonNull)->Apply(TableSortArgs);
479 
BM_TableSortRootNullable(benchmark::State & state)480 static void BM_TableSortRootNullable(benchmark::State& state) {
481   StringPool pool;
482   RootTestTable root(&pool, nullptr);
483 
484   uint32_t size = static_cast<uint32_t>(state.range(0));
485 
486   std::minstd_rand0 rnd_engine;
487   for (uint32_t i = 0; i < size; ++i) {
488     const uint32_t root_value = static_cast<uint32_t>(rnd_engine());
489 
490     RootTestTable::Row row;
491     row.root_nullable = root_value % 2 == 0
492                             ? perfetto::base::nullopt
493                             : perfetto::base::make_optional(root_value);
494     root.Insert(row);
495   }
496 
497   for (auto _ : state) {
498     benchmark::DoNotOptimize(root.Sort({root.root_nullable().ascending()}));
499   }
500 }
501 BENCHMARK(BM_TableSortRootNullable)->Apply(TableSortArgs);
502 
BM_TableSortChildNonNullInParent(benchmark::State & state)503 static void BM_TableSortChildNonNullInParent(benchmark::State& state) {
504   StringPool pool;
505   RootTestTable root(&pool, nullptr);
506   ChildTestTable child(&pool, &root);
507 
508   uint32_t size = static_cast<uint32_t>(state.range(0));
509 
510   std::minstd_rand0 rnd_engine;
511   for (uint32_t i = 0; i < size; ++i) {
512     const uint32_t root_value = static_cast<uint32_t>(rnd_engine());
513 
514     RootTestTable::Row root_row;
515     root_row.root_non_null = root_value;
516     root.Insert(root_row);
517 
518     const uint32_t child_value = static_cast<uint32_t>(rnd_engine());
519 
520     ChildTestTable::Row child_row;
521     child_row.root_non_null = child_value;
522     child.Insert(child_row);
523   }
524 
525   for (auto _ : state) {
526     benchmark::DoNotOptimize(child.Sort({child.root_non_null().ascending()}));
527   }
528 }
529 BENCHMARK(BM_TableSortChildNonNullInParent)->Apply(TableSortArgs);
530 
BM_TableSortChildNullableInParent(benchmark::State & state)531 static void BM_TableSortChildNullableInParent(benchmark::State& state) {
532   StringPool pool;
533   RootTestTable root(&pool, nullptr);
534   ChildTestTable child(&pool, &root);
535 
536   uint32_t size = static_cast<uint32_t>(state.range(0));
537 
538   std::minstd_rand0 rnd_engine;
539   for (uint32_t i = 0; i < size; ++i) {
540     const uint32_t root_value = static_cast<uint32_t>(rnd_engine());
541 
542     RootTestTable::Row root_row;
543     root_row.root_nullable = root_value % 2 == 0
544                                  ? perfetto::base::nullopt
545                                  : perfetto::base::make_optional(root_value);
546     root.Insert(root_row);
547 
548     const uint32_t child_value = static_cast<uint32_t>(rnd_engine());
549 
550     ChildTestTable::Row child_row;
551     child_row.root_nullable = child_value % 2 == 0
552                                   ? perfetto::base::nullopt
553                                   : perfetto::base::make_optional(child_value);
554     child.Insert(child_row);
555   }
556 
557   for (auto _ : state) {
558     benchmark::DoNotOptimize(child.Sort({child.root_nullable().ascending()}));
559   }
560 }
561 BENCHMARK(BM_TableSortChildNullableInParent)->Apply(TableSortArgs);
562