1 // Copyright (C) 2018 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <gtest/gtest.h>
16 #include <random>
17 
18 #include "benchmark/benchmark.h"
19 #include "perfetto/base/time.h"
20 #include "perfetto/traced/traced.h"
21 #include "perfetto/tracing/core/trace_config.h"
22 #include "perfetto/tracing/core/trace_packet.h"
23 #include "src/base/test/test_task_runner.h"
24 #include "test/task_runner_thread.h"
25 #include "test/task_runner_thread_delegates.h"
26 #include "test/test_helper.h"
27 
28 #include "perfetto/trace/trace_packet.pb.h"
29 #include "perfetto/trace/trace_packet.pbzero.h"
30 
31 namespace perfetto {
32 
33 namespace {
34 
IsBenchmarkFunctionalOnly()35 bool IsBenchmarkFunctionalOnly() {
36   return getenv("BENCHMARK_FUNCTIONAL_TEST_ONLY") != nullptr;
37 }
38 
BenchmarkProducer(benchmark::State & state)39 void BenchmarkProducer(benchmark::State& state) {
40   base::TestTaskRunner task_runner;
41 
42   TestHelper helper(&task_runner);
43   helper.StartServiceIfRequired();
44 
45   FakeProducer* producer = helper.ConnectFakeProducer();
46   helper.ConnectConsumer();
47   helper.WaitForConsumerConnect();
48 
49   TraceConfig trace_config;
50   trace_config.add_buffers()->set_size_kb(512);
51 
52   auto* ds_config = trace_config.add_data_sources()->mutable_config();
53   ds_config->set_name("android.perfetto.FakeProducer");
54   ds_config->set_target_buffer(0);
55 
56   static constexpr uint32_t kRandomSeed = 42;
57   uint32_t message_count = static_cast<uint32_t>(state.range(0));
58   uint32_t message_bytes = static_cast<uint32_t>(state.range(1));
59   uint32_t mb_per_s = static_cast<uint32_t>(state.range(2));
60 
61   uint32_t messages_per_s = mb_per_s * 1024 * 1024 / message_bytes;
62   uint32_t time_for_messages_ms =
63       10000 + (messages_per_s == 0 ? 0 : message_count * 1000 / messages_per_s);
64 
65   ds_config->mutable_for_testing()->set_seed(kRandomSeed);
66   ds_config->mutable_for_testing()->set_message_count(message_count);
67   ds_config->mutable_for_testing()->set_message_size(message_bytes);
68   ds_config->mutable_for_testing()->set_max_messages_per_second(messages_per_s);
69 
70   helper.StartTracing(trace_config);
71   helper.WaitForProducerEnabled();
72 
73   uint64_t wall_start_ns = static_cast<uint64_t>(base::GetWallTimeNs().count());
74   uint64_t service_start_ns = helper.service_thread()->GetThreadCPUTimeNs();
75   uint64_t producer_start_ns = helper.producer_thread()->GetThreadCPUTimeNs();
76   uint32_t iterations = 0;
77   for (auto _ : state) {
78     auto cname = "produced.and.committed." + std::to_string(iterations++);
79     auto on_produced_and_committed = task_runner.CreateCheckpoint(cname);
80     producer->ProduceEventBatch(helper.WrapTask(on_produced_and_committed));
81     task_runner.RunUntilCheckpoint(cname, time_for_messages_ms);
82   }
83   uint64_t service_ns =
84       helper.service_thread()->GetThreadCPUTimeNs() - service_start_ns;
85   uint64_t producer_ns =
86       helper.producer_thread()->GetThreadCPUTimeNs() - producer_start_ns;
87   uint64_t wall_ns =
88       static_cast<uint64_t>(base::GetWallTimeNs().count()) - wall_start_ns;
89 
90   state.counters["Ser CPU"] = benchmark::Counter(100.0 * service_ns / wall_ns);
91   state.counters["Ser ns/m"] =
92       benchmark::Counter(1.0 * service_ns / message_count);
93   state.counters["Pro CPU"] = benchmark::Counter(100.0 * producer_ns / wall_ns);
94   state.SetBytesProcessed(iterations * message_bytes * message_count);
95 
96   // Read back the buffer just to check correctness.
97   helper.ReadData();
98   helper.WaitForReadData();
99 
100   bool is_first_packet = true;
101   std::minstd_rand0 rnd_engine(kRandomSeed);
102   for (const auto& packet : helper.trace()) {
103     ASSERT_TRUE(packet.has_for_testing());
104     if (is_first_packet) {
105       rnd_engine = std::minstd_rand0(packet.for_testing().seq_value());
106       is_first_packet = false;
107     } else {
108       ASSERT_EQ(packet.for_testing().seq_value(), rnd_engine());
109     }
110   }
111 }
112 
BenchmarkConsumer(benchmark::State & state)113 static void BenchmarkConsumer(benchmark::State& state) {
114   base::TestTaskRunner task_runner;
115 
116   TestHelper helper(&task_runner);
117   helper.StartServiceIfRequired();
118 
119   FakeProducer* producer = helper.ConnectFakeProducer();
120   helper.ConnectConsumer();
121   helper.WaitForConsumerConnect();
122 
123   TraceConfig trace_config;
124 
125   static const uint32_t kBufferSizeBytes =
126       IsBenchmarkFunctionalOnly() ? 16 * 1024 : 2 * 1024 * 1024;
127   trace_config.add_buffers()->set_size_kb(kBufferSizeBytes / 1024);
128 
129   static constexpr uint32_t kRandomSeed = 42;
130   uint32_t message_bytes = static_cast<uint32_t>(state.range(0));
131   uint32_t mb_per_s = static_cast<uint32_t>(state.range(1));
132   bool is_saturated_producer = mb_per_s == 0;
133 
134   uint32_t message_count = kBufferSizeBytes / message_bytes;
135   uint32_t messages_per_s = mb_per_s * 1024 * 1024 / message_bytes;
136   uint32_t number_of_batches =
137       is_saturated_producer ? 0 : std::max(1u, message_count / messages_per_s);
138 
139   auto* ds_config = trace_config.add_data_sources()->mutable_config();
140   ds_config->set_name("android.perfetto.FakeProducer");
141   ds_config->set_target_buffer(0);
142   ds_config->mutable_for_testing()->set_seed(kRandomSeed);
143   ds_config->mutable_for_testing()->set_message_count(message_count);
144   ds_config->mutable_for_testing()->set_message_size(message_bytes);
145   ds_config->mutable_for_testing()->set_max_messages_per_second(messages_per_s);
146 
147   helper.StartTracing(trace_config);
148   helper.WaitForProducerEnabled();
149 
150   uint64_t wall_start_ns = static_cast<uint64_t>(base::GetWallTimeNs().count());
151   uint64_t service_start_ns =
152       static_cast<uint64_t>(helper.service_thread()->GetThreadCPUTimeNs());
153   uint64_t consumer_start_ns =
154       static_cast<uint64_t>(base::GetThreadCPUTimeNs().count());
155   uint64_t read_time_taken_ns = 0;
156 
157   uint64_t iterations = 0;
158   uint32_t counter = 0;
159   for (auto _ : state) {
160     auto cname = "produced.and.committed." + std::to_string(iterations++);
161     auto on_produced_and_committed = task_runner.CreateCheckpoint(cname);
162     producer->ProduceEventBatch(helper.WrapTask(on_produced_and_committed));
163 
164     if (is_saturated_producer) {
165       // If the producer is running in saturated mode, wait until it flushes
166       // data.
167       task_runner.RunUntilCheckpoint(cname);
168 
169       // Then time how long it takes to read back the data.
170       int64_t start = base::GetWallTimeNs().count();
171       helper.ReadData(counter);
172       helper.WaitForReadData(counter++);
173       read_time_taken_ns +=
174           static_cast<uint64_t>(base::GetWallTimeNs().count() - start);
175     } else {
176       // If the producer is not running in saturated mode, every second the
177       // producer will send a batch of data over. Wait for a second before
178       // performing readback; do this for each batch the producer sends.
179       for (uint32_t i = 0; i < number_of_batches; i++) {
180         auto batch_cname = "batch.checkpoint." + std::to_string(counter);
181         auto batch_checkpoint = task_runner.CreateCheckpoint(batch_cname);
182         task_runner.PostDelayedTask(batch_checkpoint, 1000);
183         task_runner.RunUntilCheckpoint(batch_cname);
184 
185         int64_t start = base::GetWallTimeNs().count();
186         helper.ReadData(counter);
187         helper.WaitForReadData(counter++);
188         read_time_taken_ns +=
189             static_cast<uint64_t>(base::GetWallTimeNs().count() - start);
190       }
191     }
192   }
193   uint64_t service_ns =
194       helper.service_thread()->GetThreadCPUTimeNs() - service_start_ns;
195   uint64_t consumer_ns =
196       static_cast<uint64_t>(base::GetThreadCPUTimeNs().count()) -
197       consumer_start_ns;
198   uint64_t wall_ns =
199       static_cast<uint64_t>(base::GetWallTimeNs().count()) - wall_start_ns;
200 
201   state.counters["Ser CPU"] = benchmark::Counter(100.0 * service_ns / wall_ns);
202   state.counters["Ser ns/m"] =
203       benchmark::Counter(1.0 * service_ns / message_count);
204   state.counters["Con CPU"] = benchmark::Counter(100.0 * consumer_ns / wall_ns);
205   state.counters["Con Speed"] =
206       benchmark::Counter(iterations * 1000.0 * 1000 * 1000 * kBufferSizeBytes /
207                          read_time_taken_ns);
208 }
209 
SaturateCpuProducerArgs(benchmark::internal::Benchmark * b)210 void SaturateCpuProducerArgs(benchmark::internal::Benchmark* b) {
211   int min_message_count = 16;
212   int max_message_count = IsBenchmarkFunctionalOnly() ? 1024 : 1024 * 1024;
213   int min_payload = 8;
214   int max_payload = IsBenchmarkFunctionalOnly() ? 256 : 2048;
215   for (int count = min_message_count; count <= max_message_count; count *= 2) {
216     for (int bytes = min_payload; bytes <= max_payload; bytes *= 2) {
217       b->Args({count, bytes, 0 /* speed */});
218     }
219   }
220 }
221 
ConstantRateProducerArgs(benchmark::internal::Benchmark * b)222 void ConstantRateProducerArgs(benchmark::internal::Benchmark* b) {
223   int message_count = IsBenchmarkFunctionalOnly() ? 2 * 1024 : 128 * 1024;
224   int min_speed = IsBenchmarkFunctionalOnly() ? 64 : 8;
225   int max_speed = 128;
226   for (int speed = min_speed; speed <= max_speed; speed *= 2) {
227     b->Args({message_count, 128, speed});
228     b->Args({message_count, 256, speed});
229   }
230 }
231 
SaturateCpuConsumerArgs(benchmark::internal::Benchmark * b)232 void SaturateCpuConsumerArgs(benchmark::internal::Benchmark* b) {
233   int min_payload = 8;
234   int max_payload = IsBenchmarkFunctionalOnly() ? 16 : 64 * 1024;
235   for (int bytes = min_payload; bytes <= max_payload; bytes *= 2) {
236     b->Args({bytes, 0 /* speed */});
237   }
238 }
239 
ConstantRateConsumerArgs(benchmark::internal::Benchmark * b)240 void ConstantRateConsumerArgs(benchmark::internal::Benchmark* b) {
241   int min_speed = IsBenchmarkFunctionalOnly() ? 128 : 1;
242   int max_speed = IsBenchmarkFunctionalOnly() ? 128 : 2;
243   for (int speed = min_speed; speed <= max_speed; speed *= 2) {
244     b->Args({2, speed});
245     b->Args({4, speed});
246   }
247 }
248 
249 }  // namespace
250 
BM_EndToEnd_Producer_SaturateCpu(benchmark::State & state)251 static void BM_EndToEnd_Producer_SaturateCpu(benchmark::State& state) {
252   BenchmarkProducer(state);
253 }
254 
255 BENCHMARK(BM_EndToEnd_Producer_SaturateCpu)
256     ->Unit(benchmark::kMicrosecond)
257     ->UseRealTime()
258     ->Apply(SaturateCpuProducerArgs);
259 
BM_EndToEnd_Producer_ConstantRate(benchmark::State & state)260 static void BM_EndToEnd_Producer_ConstantRate(benchmark::State& state) {
261   BenchmarkProducer(state);
262 }
263 
264 BENCHMARK(BM_EndToEnd_Producer_ConstantRate)
265     ->Unit(benchmark::kMicrosecond)
266     ->UseRealTime()
267     ->Apply(ConstantRateProducerArgs);
268 
BM_EndToEnd_Consumer_SaturateCpu(benchmark::State & state)269 static void BM_EndToEnd_Consumer_SaturateCpu(benchmark::State& state) {
270   BenchmarkConsumer(state);
271 }
272 
273 BENCHMARK(BM_EndToEnd_Consumer_SaturateCpu)
274     ->Unit(benchmark::kMicrosecond)
275     ->UseRealTime()
276     ->Apply(SaturateCpuConsumerArgs);
277 
BM_EndToEnd_Consumer_ConstantRate(benchmark::State & state)278 static void BM_EndToEnd_Consumer_ConstantRate(benchmark::State& state) {
279   BenchmarkConsumer(state);
280 }
281 
282 BENCHMARK(BM_EndToEnd_Consumer_ConstantRate)
283     ->Unit(benchmark::kMillisecond)
284     ->UseRealTime()
285     ->Apply(ConstantRateConsumerArgs);
286 
287 }  // namespace perfetto
288