1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "profiler.h"
17
18 #include <cutils/properties.h>
19 #include <hardware/google/camera/common/profiler/profiler.pb.h>
20 #include <log/log.h>
21 #include <sys/stat.h>
22
23 #include <fstream>
24 #include <list>
25 #include <mutex>
26 #include <unordered_map>
27 #include <vector>
28
29 namespace google {
30 namespace camera_common {
31 namespace {
32
33 #undef LOG_TAG
34 #define LOG_TAG "profiler"
35
StandardDeviation(std::vector<float> samples,float mean)36 float StandardDeviation(std::vector<float> samples, float mean) {
37 int size = samples.size();
38
39 double sum = 0;
40 for (int i = 0; i < size; i++) {
41 sum += pow((samples[i] - mean), 2);
42 }
43
44 return static_cast<float>(sqrt(sum / (size - 1)));
45 }
46
47 // Profiler implementatoin.
48 class ProfilerImpl : public Profiler {
49 public:
ProfilerImpl(SetPropFlag setting)50 ProfilerImpl(SetPropFlag setting) : setting_(setting) {
51 object_init_real_time_ = GetRealTimeNs();
52 object_init_boot_time_ = GetBootTimeNs();
53 };
54 ~ProfilerImpl();
55
56 // Setup the name of use case the profiler is running.
57 // Argument:
58 // usecase: the name use case of the profiler is running.
SetUseCase(std::string usecase)59 void SetUseCase(std::string usecase) override final {
60 use_case_ = std::move(usecase);
61 }
62
63 // Set the file prefix name for dumpping the profiling file.
64 // Argument:
65 // dump_file_prefix: file prefix name. In the current setting,
66 // "/data/vendor/camera/" is a valid folder for camera to dump file.
67 // A valid prefix can be "/data/vendor/camera/test_prefix_".
68 void SetDumpFilePrefix(const std::string& dump_file_prefix) override final;
69
70 // Start to profile.
71 // We use start and end to choose which code snippet to be profile.
72 // The user specifies the name, and the profiler will print the name and its
73 // timing.
74 // Arguments:
75 // name: the name of the node to be profiled.
76 // request_id: frame requesd id.
77 void Start(const std::string& name,
78 int request_id = kInvalidRequestId) override final;
79
80 // End the profileing.
81 // Arguments:
82 // name: the name of the node to be profiled. Should be the same in Start().
83 // request_id: frame requesd id.
84 void End(const std::string& name,
85 int request_id = kInvalidRequestId) override final;
86
87 // Print out the profiling result in the standard output (ANDROID_LOG_ERROR).
88 void PrintResult() override;
89
90 // Profile the frame rate
91 void ProfileFrameRate(const std::string&) override final;
92
93 // Set the interval of FPS print
94 // The unit is second and interval_seconds must >= 1
95 void SetFpsPrintInterval(int32_t interval_seconds) override final;
96
97 // Get the latency associated with the name
98 int64_t GetLatencyInNanoseconds(const std::string& name,
99 int request_id) override final;
100
101 protected:
102 // A structure to hold start time, end time, and count of profiling code
103 // snippet.
104 struct TimeSlot {
105 int64_t start = 0;
106 int64_t end = 0;
107 int32_t count = 0;
108 int32_t request_id = 0;
109 };
110
111 // A structure to store node's profiling result.
112 struct TimeResult {
113 std::string node_name;
114 float min_dt;
115 float max_dt;
116 float avg_dt;
117 float avg_count;
118 float fps;
119 float mean_max_stddevs;
TimeResultgoogle::camera_common::__anon477d26810111::ProfilerImpl::TimeResult120 TimeResult(std::string node_name, float min_dt, float max_dt, float avg_dt,
121 float count, float fps, float mean_max_stddevs)
122 : node_name(node_name),
123 min_dt(min_dt),
124 max_dt(max_dt),
125 avg_dt(avg_dt),
126 avg_count(count),
127 fps(fps),
128 mean_max_stddevs(mean_max_stddevs) {
129 }
130 };
131
132 using TimeSeries = std::vector<TimeSlot>;
133 using NodeTimingMap = std::unordered_map<std::string, TimeSeries>;
134 using NodeFrameRateMap = std::unordered_map<std::string, TimeSlot>;
135
136 static constexpr int64_t kNsPerSec = 1000000000;
137 static constexpr float kNanoToMilli = 0.000001f;
138
139 // The setting_ is used to memorize the getprop result.
140 SetPropFlag setting_;
141 // The map to record the timing of all nodes.
142 NodeTimingMap timing_map_;
143 // The map to record the timing to print fps when close.
144 NodeFrameRateMap frame_rate_map_;
145 // The map to record the timing to print fps per second.
146 NodeFrameRateMap realtime_frame_rate_map_;
147 // Use case name.
148 std::string use_case_;
149 // The prefix for the dump filename.
150 std::string dump_file_prefix_;
151 // Mutex lock.
152 std::mutex lock_;
153
154 // Get clock boot time.
GetBootTimeNs() const155 int64_t GetBootTimeNs() const {
156 if (timespec now; clock_gettime(CLOCK_BOOTTIME, &now) == 0) {
157 return now.tv_sec * kNsPerSec + now.tv_nsec;
158 } else {
159 ALOGE("clock_gettime failed");
160 return -1;
161 }
162 }
163 // Get clock real time.
GetRealTimeNs() const164 int64_t GetRealTimeNs() const {
165 if (timespec now; clock_gettime(CLOCK_REALTIME, &now) == 0) {
166 return now.tv_sec * kNsPerSec + now.tv_nsec;
167 } else {
168 ALOGE("clock_gettime failed");
169 return -1;
170 }
171 }
172
173 // Timestamp of the class object initialized using CLOCK_BOOTTIME.
174 int64_t object_init_boot_time_;
175 // Timestamp of the class object initialized using CLOCK_REALTIME.
176 int64_t object_init_real_time_;
177
178 // Create folder if not exist.
179 void CreateFolder(const std::string& folder_path);
180
181 // Dump the result to the disk.
182 // Argument:
183 // filepath: file path to dump file.
184 virtual void DumpResult(const std::string& filepath);
185
186 // Dump result in text format.
187 void DumpTxt(std::string_view filepath);
188
189 // Dump result in proto binary format.
190 void DumpPb(std::string_view filepath);
191
192 // Dump result format extension: proto or text.
193 constexpr static char kStrPb[] = ".pb";
194 constexpr static char kStrTxt[] = ".txt";
195
196 int32_t fps_print_interval_seconds_ = 1;
197 };
198
~ProfilerImpl()199 ProfilerImpl::~ProfilerImpl() {
200 if (setting_ == SetPropFlag::kDisable || timing_map_.size() == 0) {
201 return;
202 }
203 if (setting_ & SetPropFlag::kPrintBit) {
204 PrintResult();
205 }
206 if (setting_ & SetPropFlag::kDumpBit) {
207 std::string filename = std::to_string(object_init_real_time_);
208 DumpResult(dump_file_prefix_ + use_case_ + "-TS" + filename);
209 }
210 }
211
DumpResult(const std::string & filepath)212 void ProfilerImpl::DumpResult(const std::string& filepath) {
213 if (setting_ & SetPropFlag::kProto) {
214 DumpPb(filepath + kStrPb);
215 } else {
216 DumpTxt(filepath + kStrTxt);
217 }
218 }
219
CreateFolder(const std::string & folder_path)220 void ProfilerImpl::CreateFolder(const std::string& folder_path) {
221 struct stat folder_stat;
222 memset(&folder_stat, 0, sizeof(folder_stat));
223 if (stat(folder_path.c_str(), &folder_stat) != 0) {
224 if (errno != ENOENT ||
225 mkdir(folder_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
226 ALOGE("Failed to create %s. errno: %d", folder_path.c_str(), errno);
227 }
228 }
229 }
230
SetDumpFilePrefix(const std::string & dump_file_prefix)231 void ProfilerImpl::SetDumpFilePrefix(const std::string& dump_file_prefix) {
232 dump_file_prefix_ = dump_file_prefix;
233 if (setting_ & SetPropFlag::kDumpBit) {
234 if (auto index = dump_file_prefix_.rfind('/'); index != std::string::npos) {
235 CreateFolder(dump_file_prefix_.substr(0, index));
236 }
237 }
238 }
239
SetFpsPrintInterval(int32_t interval_seconds)240 void ProfilerImpl::SetFpsPrintInterval(int32_t interval_seconds) {
241 if (interval_seconds < 1) {
242 ALOGE("Wrong interval: %d, must >= 1", interval_seconds);
243 return;
244 }
245 fps_print_interval_seconds_ = interval_seconds;
246 }
247
ProfileFrameRate(const std::string & name)248 void ProfilerImpl::ProfileFrameRate(const std::string& name) {
249 std::lock_guard<std::mutex> lk(lock_);
250 // Save the timeing for each whole process
251 TimeSlot& frame_rate = frame_rate_map_[name];
252 if (frame_rate.start == 0) {
253 frame_rate.start = GetBootTimeNs();
254 frame_rate.count = 0;
255 frame_rate.end = 0;
256 } else {
257 ++frame_rate.count;
258 frame_rate.end = GetBootTimeNs();
259 }
260
261 if ((setting_ & SetPropFlag::kPrintFpsPerIntervalBit) == 0) {
262 return;
263 }
264 // Print FPS every second
265 TimeSlot& realtime_frame_rate = realtime_frame_rate_map_[name];
266 if (realtime_frame_rate.start == 0) {
267 realtime_frame_rate.start = GetBootTimeNs();
268 realtime_frame_rate.count = 0;
269 } else {
270 ++realtime_frame_rate.count;
271 int64_t current = GetBootTimeNs();
272 int64_t elapsed = current - realtime_frame_rate.start;
273 if (elapsed > kNsPerSec * fps_print_interval_seconds_) {
274 float fps =
275 realtime_frame_rate.count * kNsPerSec / static_cast<float>(elapsed);
276 float avg_fps = frame_rate.count * kNsPerSec /
277 static_cast<float>(frame_rate.end - frame_rate.start);
278 ALOGI("%s: current FPS %3.2f, avg %3.2f", name.c_str(), fps, avg_fps);
279 realtime_frame_rate.count = 0;
280 realtime_frame_rate.start = current;
281 }
282 }
283 }
284
Start(const std::string & name,int request_id)285 void ProfilerImpl::Start(const std::string& name, int request_id) {
286 if (setting_ == SetPropFlag::kDisable) {
287 return;
288 }
289
290 // When the request_id == kInvalidRequestId, it is served as a different
291 // purpose, eg. profiling first frame latency, or HAL total runtime. The valid
292 // request id is shifted by 1 to avoid the conflict.
293 int valid_request_id = (request_id == kInvalidRequestId) ? 0 : request_id + 1;
294
295 {
296 std::lock_guard<std::mutex> lk(lock_);
297 TimeSeries& time_series = timing_map_[name];
298 for (int i = time_series.size(); i <= valid_request_id; ++i) {
299 time_series.push_back(TimeSlot());
300 }
301 TimeSlot& slot = time_series[valid_request_id];
302 slot.request_id = valid_request_id;
303 slot.start += GetBootTimeNs();
304 }
305
306 if ((setting_ & SetPropFlag::kCalculateFpsOnEndBit) == 0) {
307 ProfileFrameRate(name);
308 }
309 }
310
End(const std::string & name,int request_id)311 void ProfilerImpl::End(const std::string& name, int request_id) {
312 if (setting_ == SetPropFlag::kDisable) {
313 return;
314 }
315
316 // When the request_id == kInvalidRequestId, it is served as a different
317 // purpose, eg. profiling first frame latency, or HAL total runtime. The valid
318 // request id is shifted by 1 to avoid the conflict.
319 int valid_request_id = (request_id == kInvalidRequestId) ? 0 : request_id + 1;
320
321 {
322 std::lock_guard<std::mutex> lk(lock_);
323 if (static_cast<std::size_t>(valid_request_id) < timing_map_[name].size()) {
324 TimeSlot& slot = timing_map_[name][valid_request_id];
325 slot.end += GetBootTimeNs();
326 ++slot.count;
327 }
328 }
329
330 if ((setting_ & SetPropFlag::kCalculateFpsOnEndBit) != 0) {
331 ProfileFrameRate(name);
332 }
333 }
334
PrintResult()335 void ProfilerImpl::PrintResult() {
336 ALOGI("UseCase: %s. Profiled Frames: %d.", use_case_.c_str(),
337 static_cast<int>(timing_map_.begin()->second.size()));
338
339 std::vector<TimeResult> time_results;
340
341 float sum_avg = 0.f;
342 float max_max = 0.f;
343 float sum_min = 0.f;
344 float sum_max = 0.f;
345 for (const auto& [node_name, time_series] : timing_map_) {
346 int num_frames = 0;
347 int num_samples = 0;
348 float sum_dt = 0.f;
349 float min_dt = std::numeric_limits<float>::max();
350 float max_dt = 0.f;
351 float mean_dt = 0.f;
352 std::vector<float> elapses;
353 for (const auto& slot : time_series) {
354 if (slot.count > 0) {
355 float elapsed = (slot.end - slot.start) * kNanoToMilli;
356 sum_dt += elapsed;
357 num_samples += slot.count;
358 min_dt = std::min(min_dt, elapsed);
359 max_dt = std::max(max_dt, elapsed);
360 num_frames++;
361 elapses.push_back(elapsed);
362 }
363 }
364 if (num_samples == 0) {
365 continue;
366 }
367 float avg = sum_dt / std::max(1, num_samples);
368 float avg_count = static_cast<float>(num_samples) /
369 static_cast<float>(std::max(1, num_frames));
370 mean_dt = avg * avg_count;
371 sum_avg += mean_dt;
372 sum_min += min_dt;
373 sum_max += max_dt;
374 max_max = std::max(max_max, max_dt);
375
376 // calculate StandardDeviation
377 float mean_max_stddevs = 0.f;
378 if (elapses.size() > 1) {
379 float dev_dt = StandardDeviation(elapses, mean_dt);
380 mean_max_stddevs = (max_dt - mean_dt) / dev_dt;
381 }
382
383 TimeSlot& frame_rate = frame_rate_map_[node_name];
384 int64_t duration = frame_rate.end - frame_rate.start;
385 float fps = 0;
386 if (duration > kNsPerSec) {
387 fps = frame_rate.count * kNsPerSec / static_cast<float>(duration);
388 }
389 time_results.push_back(
390 {node_name, min_dt, max_dt, mean_dt, avg_count, fps, mean_max_stddevs});
391 }
392
393 std::sort(time_results.begin(), time_results.end(),
394 [](auto a, auto b) { return a.avg_dt > b.avg_dt; });
395
396 for (const auto& result : time_results) {
397 if (result.fps == 0) {
398 ALOGI(
399 "%51.51s Min: %8.3f ms, Max: %8.3f ms, Avg: %7.3f ms "
400 "(Count = %3.1f), mean_max_stddevs: %6.2f, fps: NA",
401 result.node_name.c_str(), result.min_dt, result.max_dt, result.avg_dt,
402 result.avg_count, result.mean_max_stddevs);
403 } else {
404 ALOGI(
405 "%51.51s Min: %8.3f ms, Max: %8.3f ms, Avg: %7.3f ms "
406 "(Count = %3.1f), mean_max_stddevs: %6.2f, fps: %8.2f",
407 result.node_name.c_str(), result.min_dt, result.max_dt, result.avg_dt,
408 result.avg_count, result.mean_max_stddevs, result.fps);
409 }
410 }
411
412 ALOGI("%43.43s MIN SUM: %8.3f ms, MAX SUM: %8.3f ms, AVG SUM: %7.3f ms",
413 "", sum_min, sum_max, sum_avg);
414 ALOGI("");
415 }
416
DumpTxt(std::string_view filepath)417 void ProfilerImpl::DumpTxt(std::string_view filepath) {
418 // The dump result data is organized as 3 sections:
419 // 1. detla time and fps of each frame.
420 // 2. start time of each frame.
421 // 3. end time of each frame.
422 if (std::ofstream fout(filepath, std::ios::out); fout.is_open()) {
423 fout << "// PROFILER_DELTA_TIME_AND_FPS, UNIT:MILLISECOND //\n";
424 for (const auto& [node_name, time_series] : timing_map_) {
425 fout << node_name << " ";
426 for (const auto& time_slot : time_series) {
427 float elapsed = static_cast<float>(time_slot.end - time_slot.start) /
428 std::max(1, time_slot.count);
429 fout << elapsed * kNanoToMilli << " ";
430 }
431 fout << "\n";
432 TimeSlot& frame_rate = frame_rate_map_[node_name];
433 int64_t duration = frame_rate.end - frame_rate.start;
434 float fps = 0;
435 if (duration > kNsPerSec) {
436 fps = frame_rate.count * kNsPerSec / static_cast<float>(duration);
437 }
438 if (fps > 0) {
439 fout << node_name << " fps:" << fps;
440 } else {
441 fout << node_name << " fps: NA";
442 }
443 fout << "\n";
444 }
445
446 fout << "\n// PROFILER_START_TIME, AVG TIMESTAMP, UNIT:NANOSECOND //\n";
447 for (const auto& [node_name, time_series] : timing_map_) {
448 fout << node_name << " ";
449 for (const auto& time_slot : time_series) {
450 int64_t avg_time_stamp = time_slot.start / std::max(1, time_slot.count);
451 fout << avg_time_stamp << " ";
452 }
453 fout << "\n";
454 }
455
456 fout << "\n// PROFILER_END_TIME, AVG TIMESTAMP, UNIT:NANOSECOND //\n";
457 for (const auto& [node_name, time_series] : timing_map_) {
458 fout << node_name << " ";
459 for (const auto& time_slot : time_series) {
460 int64_t avg_time_stamp = time_slot.end / std::max(1, time_slot.count);
461 fout << avg_time_stamp << " ";
462 }
463 fout << "\n";
464 }
465 fout.close();
466 }
467 }
468
DumpPb(std::string_view filepath)469 void ProfilerImpl::DumpPb(std::string_view filepath) {
470 if (std::ofstream fout(filepath, std::ios::out); fout.is_open()) {
471 profiler::ProfilingResult profiling_result;
472 profiling_result.set_usecase(use_case_);
473 profiling_result.set_profile_start_time_nanos(object_init_real_time_);
474 profiling_result.set_profile_start_boottime_nanos(object_init_boot_time_);
475 profiling_result.set_profile_end_time_nanos(GetRealTimeNs());
476
477 for (const auto& [node_name, time_series] : timing_map_) {
478 profiler::TimeSeries& target = *profiling_result.add_target();
479 target.set_name(node_name);
480 for (const auto& time_slot : time_series) {
481 profiler::TimeStamp& time_stamp = *target.add_runtime();
482 // A single node can be called multiple times in a frame. Every time the
483 // node is called in the same frame, the profiler accumulates the
484 // timestamp value in time_slot.start/end, and increments the count.
485 // Therefore the result timestamp we stored is the `average` timestamp.
486 // Note: consider using minimum-start, and maximum-end.
487 time_stamp.set_start(time_slot.start / std::max(1, time_slot.count));
488 time_stamp.set_end(time_slot.end / std::max(1, time_slot.count));
489 time_stamp.set_count(time_slot.count);
490 time_stamp.set_request_id(time_slot.request_id);
491 }
492 }
493 profiling_result.SerializeToOstream(&fout);
494 fout.close();
495 }
496 }
497
498 // Get the latency associated with the name
GetLatencyInNanoseconds(const std::string & name,int request_id)499 int64_t ProfilerImpl::GetLatencyInNanoseconds(const std::string& name,
500 int request_id) {
501 // Will use name to add various TraceInt64 here
502 int valid_request_id = (request_id == kInvalidRequestId) ? 0 : request_id + 1;
503 int64_t latency_ns = 0;
504 {
505 std::lock_guard<std::mutex> lk(lock_);
506 if (static_cast<std::size_t>(valid_request_id) < timing_map_[name].size()) {
507 TimeSlot& slot = timing_map_[name][valid_request_id];
508 latency_ns = slot.end - slot.start;
509 }
510 }
511 return latency_ns;
512 }
513
514 class ProfilerStopwatchImpl : public ProfilerImpl {
515 public:
ProfilerStopwatchImpl(SetPropFlag setting)516 ProfilerStopwatchImpl(SetPropFlag setting) : ProfilerImpl(setting){};
517
~ProfilerStopwatchImpl()518 ~ProfilerStopwatchImpl() {
519 if (setting_ == SetPropFlag::kDisable || timing_map_.size() == 0) {
520 return;
521 }
522 if (setting_ & SetPropFlag::kPrintBit) {
523 // Virtual function won't work in parent class's destructor. need to
524 // call it by ourself.
525 PrintResult();
526 // Erase the print bit to prevent parent class print again.
527 setting_ = static_cast<SetPropFlag>(setting_ & (~SetPropFlag::kPrintBit));
528 }
529 if (setting_ & SetPropFlag::kDumpBit) {
530 DumpResult(dump_file_prefix_ + use_case_ + "-TS" +
531 std::to_string(object_init_real_time_) + ".txt");
532 setting_ = static_cast<SetPropFlag>(setting_ & (~SetPropFlag::kDumpBit));
533 }
534 }
535
536 // Print out the profiling result in the standard output (ANDROID_LOG_ERROR)
537 // with stopwatch mode.
PrintResult()538 void PrintResult() override {
539 ALOGI("Profiling Case: %s", use_case_.c_str());
540
541 // Sort by end time.
542 std::list<std::pair<std::string, TimeSlot>> time_results;
543 for (const auto& [node_name, time_series] : timing_map_) {
544 for (const auto& slot : time_series) {
545 if (slot.count > 0 && time_results.size() < time_results.max_size()) {
546 time_results.push_back({node_name, slot});
547 }
548 }
549 }
550 time_results.sort([](const auto& a, const auto& b) {
551 return a.second.end < b.second.end;
552 });
553
554 for (const auto& [node_name, slot] : time_results) {
555 if (slot.count > 0) {
556 float elapsed = (slot.end - slot.start) * kNanoToMilli;
557 ALOGI("%51.51s: %8.3f ms", node_name.c_str(), elapsed);
558 }
559 }
560
561 ALOGI("");
562 }
563
DumpResult(const std::string & filepath)564 void DumpResult(const std::string& filepath) override {
565 if (std::ofstream fout(filepath, std::ios::out); fout.is_open()) {
566 for (const auto& [node_name, time_series] : timing_map_) {
567 fout << node_name << " ";
568 for (const auto& slot : time_series) {
569 fout << (slot.end - slot.start) * kNanoToMilli << " ";
570 }
571 fout << "\n";
572 }
573 fout.close();
574 }
575 }
576 };
577
578 // Dummpy profiler class.
579 class ProfilerDummy : public Profiler {
580 public:
ProfilerDummy()581 ProfilerDummy(){};
~ProfilerDummy()582 ~ProfilerDummy(){};
583
SetUseCase(std::string)584 void SetUseCase(std::string) override final{};
SetDumpFilePrefix(const std::string &)585 void SetDumpFilePrefix(const std::string&) override final{};
Start(const std::string &,int)586 void Start(const std::string&, int) override final{};
End(const std::string &,int)587 void End(const std::string&, int) override final{};
PrintResult()588 void PrintResult() override final{};
ProfileFrameRate(const std::string &)589 void ProfileFrameRate(const std::string&) override final{};
SetFpsPrintInterval(int32_t)590 void SetFpsPrintInterval(int32_t) override final{};
GetLatencyInNanoseconds(const std::string &,int)591 int64_t GetLatencyInNanoseconds(const std::string&, int) override final {
592 return 0;
593 }
594 };
595
596 } // anonymous namespace
597
Create(int option)598 std::shared_ptr<Profiler> Profiler::Create(int option) {
599 SetPropFlag flag = static_cast<SetPropFlag>(option);
600
601 if (flag == SetPropFlag::kDisable) {
602 return std::make_shared<ProfilerDummy>();
603 } else if (flag & SetPropFlag::kStopWatch) {
604 return std::make_shared<ProfilerStopwatchImpl>(flag);
605 } else {
606 return std::make_shared<ProfilerImpl>(flag);
607 }
608 }
609
610 } // namespace camera_common
611 } // namespace google
612